Skip to content

Commit 314c3f3

Browse files
author
Ervin T
authored
Merge pull request #2393 from Unity-Technologies/hotfix-v0.9.0a
- Fix issue with BC Trainer `increment_steps`. - Fix issue with Demonstration Recorder and visual observations (memory leak fix was deleting vis obs too early). - Make Samplers sample from the same random seed every time, so generalization runs are repeatable. - Fix crash when using GAIL, Curiosity, and visual observations together.
2 parents 3ee0963 + de728e5 commit 314c3f3

File tree

13 files changed

+217
-105
lines changed

13 files changed

+217
-105
lines changed

UnitySDK/Assets/ML-Agents/Scripts/Agent.cs

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
namespace MLAgents
99
{
1010
/// <summary>
11-
/// Struct that contains all the information for an Agent, including its
11+
/// Struct that contains all the information for an Agent, including its
1212
/// observations, actions and current status, that is sent to the Brain.
1313
/// </summary>
1414
public struct AgentInfo
@@ -120,15 +120,26 @@ public CommunicatorObjects.AgentInfoProto ToProto()
120120
agentInfoProto.VisualObservations.Add(
121121
ByteString.CopyFrom(obs.EncodeToPNG())
122122
);
123+
}
124+
return agentInfoProto;
125+
}
126+
127+
/// <summary>
128+
/// Remove the visual observations from memory. Call at each timestep
129+
/// to avoid memory leaks.
130+
/// </summary>
131+
public void ClearVisualObs()
132+
{
133+
foreach (Texture2D obs in visualObservations)
134+
{
123135
Object.Destroy(obs);
124136
}
125137
visualObservations.Clear();
126-
return agentInfoProto;
127138
}
128139
}
129140

130141
/// <summary>
131-
/// Struct that contains the action information sent from the Brain to the
142+
/// Struct that contains the action information sent from the Brain to the
132143
/// Agent.
133144
/// </summary>
134145
public struct AgentAction
@@ -141,7 +152,7 @@ public struct AgentAction
141152
}
142153

143154
/// <summary>
144-
/// Struct that contains all the Agent-specific parameters provided in the
155+
/// Struct that contains all the Agent-specific parameters provided in the
145156
/// Editor. This excludes the Brain linked to the Agent since it can be
146157
/// modified programmatically.
147158
/// </summary>
@@ -153,7 +164,7 @@ public class AgentParameters
153164
/// observations.
154165
/// </summary>
155166
public List<Camera> agentCameras = new List<Camera>();
156-
167+
157168
/// <summary>
158169
/// The list of the RenderTextures the agent uses for visual
159170
/// observations.
@@ -162,7 +173,7 @@ public class AgentParameters
162173

163174

164175
/// <summary>
165-
/// The maximum number of steps the agent takes before being done.
176+
/// The maximum number of steps the agent takes before being done.
166177
/// </summary>
167178
/// <remarks>
168179
/// If set to 0, the agent can only be set to done programmatically (or
@@ -184,7 +195,7 @@ public class AgentParameters
184195
public bool resetOnDone = true;
185196

186197
/// <summary>
187-
/// Whether to enable On Demand Decisions or make a decision at
198+
/// Whether to enable On Demand Decisions or make a decision at
188199
/// every step.
189200
/// </summary>
190201
public bool onDemandDecision;
@@ -199,8 +210,8 @@ public class AgentParameters
199210

200211
/// <summary>
201212
/// Agent Monobehavior class that is attached to a Unity GameObject, making it
202-
/// an Agent. An agent produces observations and takes actions in the
203-
/// environment. Observations are determined by the cameras attached
213+
/// an Agent. An agent produces observations and takes actions in the
214+
/// environment. Observations are determined by the cameras attached
204215
/// to the agent in addition to the vector observations implemented by the
205216
/// user in <see cref="CollectObservations"/>. On the other hand, actions
206217
/// are determined by decisions produced by a linked Brain. Currently, this
@@ -213,34 +224,34 @@ public class AgentParameters
213224
/// however, an agent need not send its observation at every step since very
214225
/// little may have changed between sucessive steps. Currently, how often an
215226
/// agent updates its brain with a fresh observation is determined by the
216-
/// Academy.
217-
///
218-
/// At any step, an agent may be considered <see cref="done"/>.
227+
/// Academy.
228+
///
229+
/// At any step, an agent may be considered <see cref="done"/>.
219230
/// This could occur due to a variety of reasons:
220231
/// - The agent reached an end state within its environment.
221232
/// - The agent reached the maximum # of steps (i.e. timed out).
222233
/// - The academy reached the maximum # of steps (forced agent to be done).
223-
///
234+
///
224235
/// Here, an agent reaches an end state if it completes its task successfully
225236
/// or somehow fails along the way. In the case where an agent is done before
226237
/// the academy, it either resets and restarts, or just lingers until the
227238
/// academy is done.
228-
///
239+
///
229240
/// An important note regarding steps and episodes is due. Here, an agent step
230241
/// corresponds to an academy step, which also corresponds to Unity
231242
/// environment step (i.e. each FixedUpdate call). This is not the case for
232-
/// episodes. The academy controls the global episode count and each agent
243+
/// episodes. The academy controls the global episode count and each agent
233244
/// controls its own local episode count and can reset and start a new local
234245
/// episode independently (based on its own experience). Thus an academy
235246
/// (global) episode can be viewed as the upper-bound on an agents episode
236247
/// length and that within a single global episode, an agent may have completed
237248
/// multiple local episodes. Consequently, if an agent max step is
238249
/// set to a value larger than the academy max steps value, then the academy
239250
/// value takes precedence (since the agent max step will never be reached).
240-
///
251+
///
241252
/// Lastly, note that at any step the brain linked to the agent is allowed to
242253
/// change programmatically with <see cref="GiveBrain"/>.
243-
///
254+
///
244255
/// Implementation-wise, it is required that this class is extended and the
245256
/// virtual methods overridden. For sample implementations of agent behavior,
246257
/// see the Examples/ directory within this Unity project.
@@ -252,7 +263,7 @@ public abstract class Agent : MonoBehaviour
252263
{
253264
/// <summary>
254265
/// The Brain attached to this agent. A brain can be attached either
255-
/// directly from the Editor through AgentEditor or
266+
/// directly from the Editor through AgentEditor or
256267
/// programmatically through <see cref="GiveBrain"/>. It is OK for an agent
257268
/// to not have a brain, as long as no decision is requested.
258269
/// </summary>
@@ -523,7 +534,7 @@ void ResetData()
523534
actionMasker = new ActionMasker(param);
524535
// If we haven't initialized vectorActions, initialize to 0. This should only
525536
// happen during the creation of the Agent. In subsequent episodes, vectorAction
526-
// should stay the previous action before the Done(), so that it is properly recorded.
537+
// should stay the previous action before the Done(), so that it is properly recorded.
527538
if (action.vectorActions == null)
528539
{
529540
if (param.vectorActionSpaceType == SpaceType.continuous)
@@ -598,9 +609,9 @@ void SendInfoToBrain()
598609
brain.brainParameters.vectorObservationSize,
599610
info.vectorObservation.Count));
600611
}
601-
612+
602613
Utilities.ShiftLeft(info.stackedVectorObservation, param.vectorObservationSize);
603-
Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation,
614+
Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation,
604615
info.stackedVectorObservation.Count - info.vectorObservation.Count);
605616

606617
info.visualObservations.Clear();
@@ -624,7 +635,7 @@ void SendInfoToBrain()
624635
param.cameraResolutions[i].height);
625636
info.visualObservations.Add(obsTexture);
626637
}
627-
638+
628639
//Then add all renderTextures
629640
var camCount = agentParameters.agentCameras.Count;
630641
for (int i = 0; i < agentParameters.agentRenderTextures.Count; i++)
@@ -653,13 +664,13 @@ void SendInfoToBrain()
653664

654665
/// <summary>
655666
/// Collects the (vector, visual, text) observations of the agent.
656-
/// The agent observation describes the current environment from the
667+
/// The agent observation describes the current environment from the
657668
/// perspective of the agent.
658669
/// </summary>
659670
/// <remarks>
660671
/// Simply, an agents observation is any environment information that helps
661672
/// the Agent acheive its goal. For example, for a fighting Agent, its
662-
/// observation could include distances to friends or enemies, or the
673+
/// observation could include distances to friends or enemies, or the
663674
/// current level of ammunition at its disposal.
664675
/// Recall that an Agent may attach vector, visual or textual observations.
665676
/// Vector observations are added by calling the provided helper methods:
@@ -678,7 +689,7 @@ void SendInfoToBrain()
678689
/// needs to match the vectorObservationSize attribute of the linked Brain.
679690
/// Visual observations are implicitly added from the cameras attached to
680691
/// the Agent.
681-
/// Lastly, textual observations are added using
692+
/// Lastly, textual observations are added using
682693
/// <see cref="SetTextObs(string)"/>.
683694
/// </remarks>
684695
public virtual void CollectObservations()
@@ -861,7 +872,7 @@ public virtual void AgentAction(float[] vectorAction, string textAction, Communi
861872
}
862873

863874
/// <summary>
864-
/// Specifies the agent behavior when done and
875+
/// Specifies the agent behavior when done and
865876
/// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
866877
/// used to remove the agent from the scene.
867878
/// </summary>
@@ -906,12 +917,12 @@ public void UpdateMemoriesAction(List<float> memories)
906917
{
907918
action.memories = memories;
908919
}
909-
920+
910921
public void AppendMemoriesAction(List<float> memories)
911922
{
912923
action.memories.AddRange(memories);
913924
}
914-
925+
915926
public List<float> GetMemoriesAction()
916927
{
917928
return action.memories;
@@ -966,9 +977,9 @@ protected float ScaleAction(float rawAction, float min, float max)
966977
/// <summary>
967978
/// Sets the status of the agent.
968979
/// </summary>
969-
/// <param name="academyMaxStep">If set to <c>true</c>
980+
/// <param name="academyMaxStep">If set to <c>true</c>
970981
/// The agent must set maxStepReached.</param>
971-
/// <param name="academyDone">If set to <c>true</c>
982+
/// <param name="academyDone">If set to <c>true</c>
972983
/// The agent must set done.</param>
973984
/// <param name="academyStepCounter">Number of current steps in episode</param>
974985
void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
@@ -984,7 +995,7 @@ void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
984995
maxStepReached = true;
985996
}
986997

987-
// If the Academy needs to reset, the agent should reset
998+
// If the Academy needs to reset, the agent should reset
988999
// even if it reseted recently.
9891000
if (academyDone)
9901001
{
@@ -996,7 +1007,7 @@ void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
9961007
/// Signals the agent that it must reset if its done flag is set to true.
9971008
void ResetIfDone()
9981009
{
999-
// If an agent is done, then it will also
1010+
// If an agent is done, then it will also
10001011
// request for a decision and an action
10011012
if (IsDone())
10021013
{
@@ -1126,14 +1137,14 @@ public static Texture2D ObservationToTexture(Camera obsCamera, int width, int he
11261137
obsCamera.Render();
11271138

11281139
texture2D.ReadPixels(new Rect(0, 0, texture2D.width, texture2D.height), 0, 0);
1129-
1140+
11301141
obsCamera.targetTexture = prevCameraRT;
11311142
obsCamera.rect = oldRec;
11321143
RenderTexture.active = prevActiveRT;
11331144
RenderTexture.ReleaseTemporary(tempRT);
11341145
return texture2D;
11351146
}
1136-
1147+
11371148
/// <summary>
11381149
/// Converts a RenderTexture and correspinding resolution to a 2D texture.
11391150
/// </summary>
@@ -1150,7 +1161,7 @@ public static Texture2D ObservationToTexture(RenderTexture obsTexture, int width
11501161
{
11511162
texture2D.Resize(width, height);
11521163
}
1153-
1164+
11541165
if(width != obsTexture.width || height != obsTexture.height)
11551166
{
11561167
throw new UnityAgentsException(string.Format(
@@ -1175,5 +1186,5 @@ public void SetCustomObservation(CustomObservation customObservation)
11751186
{
11761187
info.customObservation = customObservation;
11771188
}
1178-
}
1189+
}
11791190
}

UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@
66
namespace MLAgents
77
{
88
/// <summary>
9-
/// The batcher is an RL specific class that makes sure that the information each object in
10-
/// Unity (Academy and Brains) wants to send to External is appropriately batched together
9+
/// The batcher is an RL specific class that makes sure that the information each object in
10+
/// Unity (Academy and Brains) wants to send to External is appropriately batched together
1111
/// and sent only when necessary.
12-
///
12+
///
1313
/// The Batcher will only send a Message to the Communicator when either :
1414
/// 1 - The academy is done
1515
/// 2 - At least one brain has data to send
16-
///
17-
/// At each step, the batcher will keep track of the brains that queried the batcher for that
16+
///
17+
/// At each step, the batcher will keep track of the brains that queried the batcher for that
1818
/// step. The batcher can only send the batched data when all the Brains have queried the
1919
/// Batcher.
2020
/// </summary>
@@ -67,7 +67,7 @@ public Batcher(Communicator communicator)
6767
}
6868

6969
/// <summary>
70-
/// Sends the academy parameters through the Communicator.
70+
/// Sends the academy parameters through the Communicator.
7171
/// Is used by the academy to send the AcademyParameters to the communicator.
7272
/// </summary>
7373
/// <returns>The External Initialization Parameters received.</returns>
@@ -104,7 +104,7 @@ public CommunicatorObjects.UnityRLInitializationInput SendAcademyParameters(
104104
/// Registers the done flag of the academy to the next output to be sent
105105
/// to the communicator.
106106
/// </summary>
107-
/// <param name="done">If set to <c>true</c>
107+
/// <param name="done">If set to <c>true</c>
108108
/// The academy done state will be sent to External at the next Exchange.</param>
109109
public void RegisterAcademyDoneFlag(bool done)
110110
{
@@ -164,7 +164,7 @@ public void SubscribeBrain(string brainKey)
164164

165165
/// <summary>
166166
/// Sends the brain info. If at least one brain has an agent in need of
167-
/// a decision or if the academy is done, the data is sent via
167+
/// a decision or if the academy is done, the data is sent via
168168
/// Communicator. Else, a new step is realized. The data can only be
169169
/// sent once all the brains that subscribed to the batcher have tried
170170
/// to send information.
@@ -198,6 +198,9 @@ public void SendBrainInfo(
198198
{
199199
CommunicatorObjects.AgentInfoProto agentInfoProto = agentInfo[agent].ToProto();
200200
m_currentUnityRLOutput.AgentInfos[brainKey].Value.Add(agentInfoProto);
201+
// Avoid visual obs memory leak. This should be called AFTER we are done with the visual obs.
202+
// e.g. after recording them to demo and using them for inference.
203+
agentInfo[agent].ClearVisualObs();
201204
}
202205

203206
m_hasData[brainKey] = true;

gym-unity/setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44

55
setup(
66
name="gym_unity",
7-
version="0.4.3",
7+
version="0.4.3a",
88
description="Unity Machine Learning Agents Gym Interface",
99
license="Apache License 2.0",
1010
author="Unity Technologies",
1111
author_email="[email protected]",
1212
url="https://github.com/Unity-Technologies/ml-agents",
1313
packages=find_packages(),
14-
install_requires=["gym", "mlagents_envs==0.9.0"],
14+
install_requires=["gym", "mlagents_envs==0.9.0a"],
1515
)

0 commit comments

Comments
 (0)