Fix training not quitting when play button is unchecked (#2376)

Ervin T · Jonathan Harper · commit 52c004b7c683 · 2019-08-01T14:40:00.000-07:00
This fixes an issue where stopping the game when training in the Editor won't end training, due to the new asynchronous SubprocessEnvManager changes.  Another minor change was made to move the `env_manager.close()` in TrainerController to the end of `start_learning` so that we are more likely to save the model if something goes wrong during the environment shutdown (this occurs sometimes on Windows machines).
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
@@ -11,6 +11,7 @@
 from .brain import AllBrainInfo, BrainInfo, BrainParameters
 from .exception import (
     UnityEnvironmentException,
+    UnityCommunicationException,
     UnityActionException,
     UnityTimeOutException,
 )
@@ -343,7 +344,7 @@ def reset(
                 self._generate_reset_input(train_mode, config, custom_reset_parameters)
             )
             if outputs is None:
-                raise KeyboardInterrupt
+                raise UnityCommunicationException("Communicator has stopped.")
             rl_output = outputs.rl_output
             s = self._get_state(rl_output)
             self._global_done = s[1]
@@ -570,7 +571,7 @@ def step(
             with hierarchical_timer("communicator.exchange"):
                 outputs = self.communicator.exchange(step_input)
             if outputs is None:
-                raise KeyboardInterrupt
+                raise UnityCommunicationException("Communicator has stopped.")
             rl_output = outputs.rl_output
             state = self._get_state(rl_output)
             self._global_done = state[1]
diff --git a/ml-agents-envs/mlagents/envs/exception.py b/ml-agents-envs/mlagents/envs/exception.py
@@ -19,6 +19,14 @@ class UnityEnvironmentException(UnityException):
     pass
 
 
+class UnityCommunicationException(UnityException):
+    """
+    Related to errors with the communicator.
+    """
+
+    pass
+
+
 class UnityActionException(UnityException):
     """
     Related to errors with sending actions.
diff --git a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
@@ -2,6 +2,7 @@
 import cloudpickle
 
 from mlagents.envs import UnityEnvironment
+from mlagents.envs.exception import UnityCommunicationException
 from multiprocessing import Process, Pipe, Queue
 from multiprocessing.connection import Connection
 from queue import Empty as EmptyQueueException
@@ -47,14 +48,14 @@ def send(self, name: str, payload=None):
             cmd = EnvironmentCommand(name, payload)
             self.conn.send(cmd)
         except (BrokenPipeError, EOFError):
-            raise KeyboardInterrupt
+            raise UnityCommunicationException("UnityEnvironment worker: send failed.")
 
     def recv(self) -> EnvironmentResponse:
         try:
             response: EnvironmentResponse = self.conn.recv()
             return response
         except (BrokenPipeError, EOFError):
-            raise KeyboardInterrupt
+            raise UnityCommunicationException("UnityEnvironment worker: recv failed.")
 
     def close(self):
         try:
@@ -115,8 +116,9 @@ def _send_response(cmd_name, payload):
                 _send_response("global_done", env.global_done)
             elif cmd.name == "close":
                 break
-    except KeyboardInterrupt:
-        print("UnityEnvironment worker: keyboard interrupt")
+    except (KeyboardInterrupt, UnityCommunicationException):
+        print("UnityEnvironment worker: environment stopping.")
+        step_queue.put(EnvironmentResponse("env_close", worker_id, None))
     finally:
         step_queue.close()
         env.close()
@@ -171,6 +173,10 @@ def step(self) -> List[StepInfo]:
             try:
                 while True:
                     step = self.step_queue.get_nowait()
+                    if step.name == "env_close":
+                        raise UnityCommunicationException(
+                            "At least one of the environments has closed."
+                        )
                     self.env_workers[step.worker_id].waiting = False
                     if step.worker_id not in step_workers:
                         worker_steps.append(step)
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -14,7 +14,10 @@
 from mlagents.envs import BrainParameters
 from mlagents.envs.env_manager import StepInfo
 from mlagents.envs.env_manager import EnvManager
-from mlagents.envs.exception import UnityEnvironmentException
+from mlagents.envs.exception import (
+    UnityEnvironmentException,
+    UnityCommunicationException,
+)
 from mlagents.envs.sampler_class import SamplerManager
 from mlagents.envs.timers import hierarchical_timer, get_timer_tree, timed
 from mlagents.trainers import Trainer, TrainerMetrics
@@ -302,15 +305,15 @@ def start_learning(
             # Final save Tensorflow model
             if global_step != 0 and self.train_model:
                 self._save_model()
-        except KeyboardInterrupt:
+        except (KeyboardInterrupt, UnityCommunicationException):
             if self.train_model:
                 self._save_model_when_interrupted()
             pass
-        env_manager.close()
         if self.train_model:
             self._write_training_metrics()
             self._export_graph()
         self._write_timing_tree()
+        env_manager.close()
 
     def end_trainer_episodes(
         self, env: BaseUnityEnvironment, lessons_incremented: Dict[str, bool]