Skip to content

Commit 35f4d71

Browse files
Ervin Tanupambhatnagar
authored andcommitted
[bug-fix] Empty ignored trajectory queues, make sure queues don't overflow (#3451)
1 parent 803e62f commit 35f4d71

File tree

3 files changed

+38
-4
lines changed

3 files changed

+38
-4
lines changed

com.unity.ml-agents/CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,23 @@ All notable changes to this package will be documented in this file.
44
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
55
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
66

7+
## [Unreleased]
8+
### Major Changes
9+
- Agent.CollectObservations now takes a VectorSensor argument. It was also overloaded to optionally take an ActionMasker argument. (#3352, #3389)
10+
11+
### Minor Changes
12+
- Monitor.cs was moved to Examples. (#3372)
13+
- Automatic stepping for Academy is now controlled from the AutomaticSteppingEnabled property. (#3376)
14+
- The GetEpisodeCount, GetStepCount, GetTotalStepCount and methods of Academy were changed to EpisodeCount, StepCount, TotalStepCount properties respectively. (#3376)
15+
- Several classes were changed from public to internal visibility. (#3390)
16+
- Academy.RegisterSideChannel and UnregisterSideChannel methods were added. (#3391)
17+
- A tutorial on adding custom SideChannels was added (#3391)
18+
- Update Barracuda to 0.6.0-preview
19+
20+
### Bugfixes
21+
- Fixed an issue which caused self-play training sessions to consume a lot of memory. (#3451)
22+
23+
724
## [0.14.0-preview] - 2020-02-13
825

926
### Major Changes

ml-agents/mlagents/trainers/ghost/trainer.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def __init__(
4040

4141
self.internal_policy_queues: List[AgentManagerQueue[Policy]] = []
4242
self.internal_trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
43+
self.ignored_trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
4344
self.learning_policy_queues: Dict[str, AgentManagerQueue[Policy]] = {}
4445

4546
# assign ghost's stats collection to wrapped trainer's
@@ -134,10 +135,14 @@ def advance(self) -> None:
134135
self.trajectory_queues, self.internal_trajectory_queues
135136
):
136137
try:
137-
t = traj_queue.get_nowait()
138-
# adds to wrapped trainers queue
139-
internal_traj_queue.put(t)
140-
self._process_trajectory(t)
138+
# We grab at most the maximum length of the queue.
139+
# This ensures that even if the queue is being filled faster than it is
140+
# being emptied, the trajectories in the queue are on-policy.
141+
for _ in range(traj_queue.maxlen):
142+
t = traj_queue.get_nowait()
143+
# adds to wrapped trainers queue
144+
internal_traj_queue.put(t)
145+
self._process_trajectory(t)
141146
except AgentManagerQueue.Empty:
142147
pass
143148

@@ -162,6 +167,14 @@ def advance(self) -> None:
162167
self._swap_snapshots()
163168
self.last_swap = self.get_step
164169

170+
# Dump trajectories from non-learning policy
171+
for traj_queue in self.ignored_trajectory_queues:
172+
try:
173+
for _ in range(traj_queue.maxlen):
174+
traj_queue.get_nowait()
175+
except AgentManagerQueue.Empty:
176+
pass
177+
165178
def end_episode(self):
166179
self.trainer.end_episode()
167180

@@ -256,6 +269,8 @@ def subscribe_trajectory_queue(
256269

257270
self.internal_trajectory_queues.append(internal_trajectory_queue)
258271
self.trainer.subscribe_trajectory_queue(internal_trajectory_queue)
272+
else:
273+
self.ignored_trajectory_queues.append(trajectory_queue)
259274

260275

261276
# Taken from https://github.com/Unity-Technologies/ml-agents/pull/1975 and

ml-agents/mlagents/trainers/tests/test_ghost.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ def test_process_trajectory(dummy_config):
152152

153153
# Check that ghost trainer ignored off policy queue
154154
assert trainer.trainer.update_buffer.num_experiences == 15
155+
# Check that it emptied the queue
156+
assert trajectory_queue1.empty()
155157

156158

157159
def test_publish_queue(dummy_config):

0 commit comments

Comments
 (0)