Merge pull request #1922 from Unity-Technologies/release-v08-slowflag

eshvk · web-flow · commit 30a573898100 · 2019-04-12T09:48:38.000-07:00
Fix '--slow' flag after environment updates
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
@@ -33,8 +33,7 @@ def __init__(self,
                  seed: int = 0,
                  docker_training: bool = False,
                  no_graphics: bool = False,
-                 timeout_wait: int = 30,
-                 train_mode: bool = True):
+                 timeout_wait: int = 30):
         """
         Starts a new unity environment and establishes a connection with the environment.
         Notice: Currently communication between Unity and Python takes place over an open socket without authentication.
@@ -56,7 +55,6 @@ def __init__(self,
         self._loaded = False  # If true, this means the environment was successfully loaded
         self.proc1 = None  # The process that is started. If None, no process was started
         self.communicator = self.get_communicator(worker_id, base_port, timeout_wait)
-        self._train_mode = train_mode
 
         # If the environment name is None, a new environment will not be launched
         # and the communicator will directly try to connect to an existing unity environment.
@@ -245,7 +243,7 @@ def __str__(self):
                                                    for k in self._resetParameters])) + '\n' + \
                '\n'.join([str(self._brains[b]) for b in self._brains])
 
-    def reset(self, config=None, train_mode=None, custom_reset_parameters=None) -> AllBrainInfo:
+    def reset(self, config=None, train_mode=True, custom_reset_parameters=None) -> AllBrainInfo:
         """
         Sends a signal to reset the unity environment.
         :return: AllBrainInfo  : A data structure corresponding to the initial reset state of the environment.
@@ -265,11 +263,6 @@ def reset(self, config=None, train_mode=None, custom_reset_parameters=None) -> A
                 raise UnityEnvironmentException(
                     "The parameter '{0}' is not a valid parameter.".format(k))
 
-        if train_mode is None:
-            train_mode = self._train_mode
-        else:
-            self._train_mode = train_mode
-
         if self._loaded:
             outputs = self.communicator.exchange(
                 self._generate_reset_input(train_mode, config, custom_reset_parameters)
diff --git a/ml-agents-envs/mlagents/envs/tests/test_subprocess_unity_environment.py b/ml-agents-envs/mlagents/envs/tests/test_subprocess_unity_environment.py
diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
@@ -76,8 +76,7 @@ def run_training(sub_id: int, run_seed: int, run_options, process_queue):
         docker_target_name,
         no_graphics,
         run_seed,
-        base_port + (sub_id * num_envs),
-        fast_simulation
+        base_port + (sub_id * num_envs)
     )
     env = SubprocessUnityEnvironment(env_factory, num_envs)
     maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)
@@ -87,7 +86,7 @@ def run_training(sub_id: int, run_seed: int, run_options, process_queue):
                            save_freq, maybe_meta_curriculum,
                            load_model, train_model,
                            keep_checkpoints, lesson, env.external_brains,
-                           run_seed)
+                           run_seed, fast_simulation)
 
     # Signal that environment has been launched.
     process_queue.put(True)
@@ -156,8 +155,7 @@ def create_environment_factory(
         docker_target_name: str,
         no_graphics: bool,
         seed: Optional[int],
-        start_port: int,
-        fast_simulation: bool
+        start_port: int
 ) -> Callable[[int], BaseUnityEnvironment]:
     if env_path is not None:
         # Strip out executable extensions if passed
@@ -191,8 +189,7 @@ def create_unity_environment(worker_id: int) -> UnityEnvironment:
             seed=env_seed,
             docker_training=docker_training,
             no_graphics=no_graphics,
-            base_port=start_port,
-            train_mode=(not fast_simulation)
+            base_port=start_port
         )
     return create_unity_environment
 
diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py
@@ -51,7 +51,8 @@ def test_run_training(load_config, create_environment_factory, subproc_env_mock)
                 5,
                 0,
                 subproc_env_mock.return_value.external_brains,
-                0
+                0,
+                True
             )
 
 
diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
@@ -152,14 +152,15 @@ def basic_trainer_controller(brain_info):
         keep_checkpoints=False,
         lesson=None,
         external_brains={'testbrain': brain_info},
-        training_seed=99
+        training_seed=99,
+        fast_simulation=True
     )
 
 @patch('numpy.random.seed')
 @patch('tensorflow.set_random_seed')
 def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
     seed = 27
-    TrainerController('', '', '1', 1, None, True, False, False, None, {}, seed)
+    TrainerController('', '', '1', 1, None, True, False, False, None, {}, seed, True)
     numpy_random_seed.assert_called_with(seed)
     tensorflow_set_seed.assert_called_with(seed)
 
diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_metrics.py b/ml-agents/mlagents/trainers/tests/test_trainer_metrics.py
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -15,7 +15,7 @@
 from mlagents.envs import AllBrainInfo, BrainParameters
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
 from mlagents.envs.exception import UnityEnvironmentException
-from mlagents.trainers import Trainer, Policy
+from mlagents.trainers import Trainer
 from mlagents.trainers.ppo.trainer import PPOTrainer
 from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
 from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
@@ -34,7 +34,8 @@ def __init__(self,
                  keep_checkpoints: int,
                  lesson: Optional[int],
                  external_brains: Dict[str, BrainParameters],
-                 training_seed: int):
+                 training_seed: int,
+                 fast_simulation: bool):
         """
         :param model_path: Path to save the model.
         :param summaries_dir: Folder to save training summaries.
@@ -66,6 +67,7 @@ def __init__(self,
         self.meta_curriculum = meta_curriculum
         self.seed = training_seed
         self.training_start_time = time()
+        self.fast_simulation = fast_simulation
         np.random.seed(self.seed)
         tf.set_random_seed(self.seed)
 
@@ -186,9 +188,9 @@ def _reset_env(self, env: BaseUnityEnvironment):
             environment.
         """
         if self.meta_curriculum is not None:
-            return env.reset(config=self.meta_curriculum.get_config())
+            return env.reset(train_mode=self.fast_simulation, config=self.meta_curriculum.get_config())
         else:
-            return env.reset()
+            return env.reset(train_mode=self.fast_simulation)
 
     def start_learning(self, env: BaseUnityEnvironment, trainer_config):
         # TODO: Should be able to start learning at different lesson numbers

Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,8 @@ def test_run_training(load_config, create_environment_factory, subproc_env_mock)`
`51`	`51`	`5,`
`52`	`52`	`0,`
`53`	`53`	`subproc_env_mock.return_value.external_brains,`
`54`		`- 0`
	`54`	`+ 0,`
	`55`	`+ True`
`55`	`56`	`)`
`56`	`57`
`57`	`58`