[bug-fix] Empty ignored trajectory queues, make sure queues don't overflow (#3451)

Ervin T · anupambhatnagar · commit 35f4d71e0824 · 2020-02-25T16:30:00.000-08:00
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -4,6 +4,23 @@ All notable changes to this package will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+### Major Changes
+ - Agent.CollectObservations now takes a VectorSensor argument. It was also overloaded to optionally take an ActionMasker argument. (#3352, #3389)
+
+### Minor Changes
+ - Monitor.cs was moved to Examples. (#3372)
+ - Automatic stepping for Academy is now controlled from the AutomaticSteppingEnabled property. (#3376)
+ - The GetEpisodeCount, GetStepCount, GetTotalStepCount and methods of Academy were changed to EpisodeCount, StepCount, TotalStepCount properties respectively. (#3376)
+ - Several classes were changed from public to internal visibility. (#3390)
+ - Academy.RegisterSideChannel and UnregisterSideChannel methods were added. (#3391)
+ - A tutorial on adding custom SideChannels was added (#3391)
+ - Update Barracuda to 0.6.0-preview
+
+### Bugfixes
+- Fixed an issue which caused self-play training sessions to consume a lot of memory. (#3451)
+
+
 ## [0.14.0-preview] - 2020-02-13
 
 ### Major Changes
diff --git a/ml-agents/mlagents/trainers/ghost/trainer.py b/ml-agents/mlagents/trainers/ghost/trainer.py
@@ -40,6 +40,7 @@ def __init__(
 
         self.internal_policy_queues: List[AgentManagerQueue[Policy]] = []
         self.internal_trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
+        self.ignored_trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
         self.learning_policy_queues: Dict[str, AgentManagerQueue[Policy]] = {}
 
         # assign ghost's stats collection to wrapped trainer's
@@ -134,10 +135,14 @@ def advance(self) -> None:
             self.trajectory_queues, self.internal_trajectory_queues
         ):
             try:
-                t = traj_queue.get_nowait()
-                # adds to wrapped trainers queue
-                internal_traj_queue.put(t)
-                self._process_trajectory(t)
+                # We grab at most the maximum length of the queue.
+                # This ensures that even if the queue is being filled faster than it is
+                # being emptied, the trajectories in the queue are on-policy.
+                for _ in range(traj_queue.maxlen):
+                    t = traj_queue.get_nowait()
+                    # adds to wrapped trainers queue
+                    internal_traj_queue.put(t)
+                    self._process_trajectory(t)
             except AgentManagerQueue.Empty:
                 pass
 
@@ -162,6 +167,14 @@ def advance(self) -> None:
             self._swap_snapshots()
             self.last_swap = self.get_step
 
+        # Dump trajectories from non-learning policy
+        for traj_queue in self.ignored_trajectory_queues:
+            try:
+                for _ in range(traj_queue.maxlen):
+                    traj_queue.get_nowait()
+            except AgentManagerQueue.Empty:
+                pass
+
     def end_episode(self):
         self.trainer.end_episode()
 
@@ -256,6 +269,8 @@ def subscribe_trajectory_queue(
 
             self.internal_trajectory_queues.append(internal_trajectory_queue)
             self.trainer.subscribe_trajectory_queue(internal_trajectory_queue)
+        else:
+            self.ignored_trajectory_queues.append(trajectory_queue)
 
 
 # Taken from https://github.com/Unity-Technologies/ml-agents/pull/1975 and
diff --git a/ml-agents/mlagents/trainers/tests/test_ghost.py b/ml-agents/mlagents/trainers/tests/test_ghost.py
@@ -152,6 +152,8 @@ def test_process_trajectory(dummy_config):
 
     # Check that ghost trainer ignored off policy queue
     assert trainer.trainer.update_buffer.num_experiences == 15
+    # Check that it emptied the queue
+    assert trajectory_queue1.empty()
 
 
 def test_publish_queue(dummy_config):