Unity-Technologies
diff --git a/‎UnitySDK/Assets/ML-Agents/Scripts/Agent.cs‎
Lines changed: 47 additions & 36 deletions b/‎UnitySDK/Assets/ML-Agents/Scripts/Agent.cs‎
Lines changed: 47 additions & 36 deletions
diff --git a/‎UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs‎
Lines changed: 11 additions & 8 deletions b/‎UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs‎
Lines changed: 11 additions & 8 deletions
diff --git a/‎gym-unity/setup.py‎
Lines changed: 2 additions & 2 deletions b/‎gym-unity/setup.py‎
Lines changed: 2 additions & 2 deletions
@@ -8,7 +8,7 @@
 namespace MLAgents
 {
     /// <summary>
-    /// Struct that contains all the information for an Agent, including its 
+    /// Struct that contains all the information for an Agent, including its
     /// observations, actions and current status, that is sent to the Brain.
     /// </summary>
     public struct AgentInfo
@@ -120,15 +120,26 @@ public CommunicatorObjects.AgentInfoProto ToProto()
                 agentInfoProto.VisualObservations.Add(
                     ByteString.CopyFrom(obs.EncodeToPNG())
                 );
+            }
+            return agentInfoProto;
+        }
+
+        /// <summary>
+        /// Remove the visual observations from memory. Call at each timestep
+        /// to avoid memory leaks.
+        /// </summary>
+        public void ClearVisualObs()
+        {
+            foreach (Texture2D obs in visualObservations)
+            {
                 Object.Destroy(obs);
             }
             visualObservations.Clear();
-            return agentInfoProto;
         }
     }
 
     /// <summary>
-    /// Struct that contains the action information sent from the Brain to the 
+    /// Struct that contains the action information sent from the Brain to the
     /// Agent.
     /// </summary>
     public struct AgentAction
@@ -141,7 +152,7 @@ public struct AgentAction
     }
 
     /// <summary>
-    /// Struct that contains all the Agent-specific parameters provided in the 
+    /// Struct that contains all the Agent-specific parameters provided in the
     /// Editor. This excludes the Brain linked to the Agent since it can be
     /// modified programmatically.
     /// </summary>
@@ -153,7 +164,7 @@ public class AgentParameters
         /// observations.
         /// </summary>
         public List<Camera> agentCameras = new List<Camera>();
-        
+
         /// <summary>
         /// The list of the RenderTextures the agent uses for visual
         /// observations.
@@ -162,7 +173,7 @@ public class AgentParameters
 
 
         /// <summary>
-        /// The maximum number of steps the agent takes before being done. 
+        /// The maximum number of steps the agent takes before being done.
         /// </summary>
         /// <remarks>
         /// If set to 0, the agent can only be set to done programmatically (or
@@ -184,7 +195,7 @@ public class AgentParameters
         public bool resetOnDone = true;
 
         /// <summary>
-        /// Whether to enable On Demand Decisions or make a decision at 
+        /// Whether to enable On Demand Decisions or make a decision at
         /// every step.
         /// </summary>
         public bool onDemandDecision;
@@ -199,8 +210,8 @@ public class AgentParameters
 
     /// <summary>
     /// Agent Monobehavior class that is attached to a Unity GameObject, making it
-    /// an Agent. An agent produces observations and takes actions in the 
-    /// environment. Observations are determined by the cameras attached 
+    /// an Agent. An agent produces observations and takes actions in the
+    /// environment. Observations are determined by the cameras attached
     /// to the agent in addition to the vector observations implemented by the
     /// user in <see cref="CollectObservations"/>. On the other hand, actions
     /// are determined by decisions produced by a linked Brain. Currently, this
@@ -213,34 +224,34 @@ public class AgentParameters
     /// however, an agent need not send its observation at every step since very
     /// little may have changed between sucessive steps. Currently, how often an
     /// agent updates its brain with a fresh observation is determined by the
-    /// Academy. 
-    /// 
-    /// At any step, an agent may be considered <see cref="done"/>. 
+    /// Academy.
+    ///
+    /// At any step, an agent may be considered <see cref="done"/>.
     /// This could occur due to a variety of reasons:
     ///     - The agent reached an end state within its environment.
     ///     - The agent reached the maximum # of steps (i.e. timed out).
     ///     - The academy reached the maximum # of steps (forced agent to be done).
-    /// 
+    ///
     /// Here, an agent reaches an end state if it completes its task successfully
     /// or somehow fails along the way. In the case where an agent is done before
     /// the academy, it either resets and restarts, or just lingers until the
     /// academy is done.
-    /// 
+    ///
     /// An important note regarding steps and episodes is due. Here, an agent step
     /// corresponds to an academy step, which also corresponds to Unity
     /// environment step (i.e. each FixedUpdate call). This is not the case for
-    /// episodes. The academy controls the global episode count and each agent 
+    /// episodes. The academy controls the global episode count and each agent
     /// controls its own local episode count and can reset and start a new local
     /// episode independently (based on its own experience). Thus an academy
     /// (global) episode can be viewed as the upper-bound on an agents episode
     /// length and that within a single global episode, an agent may have completed
     /// multiple local episodes. Consequently, if an agent max step is
     /// set to a value larger than the academy max steps value, then the academy
     /// value takes precedence (since the agent max step will never be reached).
-    /// 
+    ///
     /// Lastly, note that at any step the brain linked to the agent is allowed to
     /// change programmatically with <see cref="GiveBrain"/>.
-    /// 
+    ///
     /// Implementation-wise, it is required that this class is extended and the
     /// virtual methods overridden. For sample implementations of agent behavior,
     /// see the Examples/ directory within this Unity project.
@@ -252,7 +263,7 @@ public abstract class Agent : MonoBehaviour
     {
         /// <summary>
         /// The Brain attached to this agent. A brain can be attached either
-        /// directly from the Editor through AgentEditor or 
+        /// directly from the Editor through AgentEditor or
         /// programmatically through <see cref="GiveBrain"/>. It is OK for an agent
         /// to not have a brain, as long as no decision is requested.
         /// </summary>
@@ -523,7 +534,7 @@ void ResetData()
             actionMasker = new ActionMasker(param);
             // If we haven't initialized vectorActions, initialize to 0. This should only
             // happen during the creation of the Agent. In subsequent episodes, vectorAction
-            // should stay the previous action before the Done(), so that it is properly recorded. 
+            // should stay the previous action before the Done(), so that it is properly recorded.
             if (action.vectorActions == null)
             {
                 if (param.vectorActionSpaceType == SpaceType.continuous)
@@ -598,9 +609,9 @@ void SendInfoToBrain()
                     brain.brainParameters.vectorObservationSize,
                     info.vectorObservation.Count));
             }
-            
+
             Utilities.ShiftLeft(info.stackedVectorObservation, param.vectorObservationSize);
-            Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation, 
+            Utilities.ReplaceRange(info.stackedVectorObservation, info.vectorObservation,
                                     info.stackedVectorObservation.Count - info.vectorObservation.Count);
 
             info.visualObservations.Clear();
@@ -624,7 +635,7 @@ void SendInfoToBrain()
                     param.cameraResolutions[i].height);
                 info.visualObservations.Add(obsTexture);
             }
-            
+
             //Then add all renderTextures
             var camCount = agentParameters.agentCameras.Count;
             for (int i = 0; i < agentParameters.agentRenderTextures.Count; i++)
@@ -653,13 +664,13 @@ void SendInfoToBrain()
 
         /// <summary>
         /// Collects the (vector, visual, text) observations of the agent.
-        /// The agent observation describes the current environment from the 
+        /// The agent observation describes the current environment from the
         /// perspective of the agent.
         /// </summary>
         /// <remarks>
         /// Simply, an agents observation is any environment information that helps
         /// the Agent acheive its goal. For example, for a fighting Agent, its
-        /// observation could include distances to friends or enemies, or the 
+        /// observation could include distances to friends or enemies, or the
         /// current level of ammunition at its disposal.
         /// Recall that an Agent may attach vector, visual or textual observations.
         /// Vector observations are added by calling the provided helper methods:
@@ -678,7 +689,7 @@ void SendInfoToBrain()
         /// needs to match the vectorObservationSize attribute of the linked Brain.
         /// Visual observations are implicitly added from the cameras attached to
         /// the Agent.
-        /// Lastly, textual observations are added using 
+        /// Lastly, textual observations are added using
         /// <see cref="SetTextObs(string)"/>.
         /// </remarks>
         public virtual void CollectObservations()
@@ -861,7 +872,7 @@ public virtual void AgentAction(float[] vectorAction, string textAction, Communi
         }
 
         /// <summary>
-        /// Specifies the agent behavior when done and 
+        /// Specifies the agent behavior when done and
         /// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
         /// used to remove the agent from the scene.
         /// </summary>
@@ -906,12 +917,12 @@ public void UpdateMemoriesAction(List<float> memories)
         {
             action.memories = memories;
         }
-        
+
         public void AppendMemoriesAction(List<float> memories)
         {
             action.memories.AddRange(memories);
         }
-        
+
         public List<float> GetMemoriesAction()
         {
             return action.memories;
@@ -966,9 +977,9 @@ protected float ScaleAction(float rawAction, float min, float max)
         /// <summary>
         /// Sets the status of the agent.
         /// </summary>
-        /// <param name="academyMaxStep">If set to <c>true</c> 
+        /// <param name="academyMaxStep">If set to <c>true</c>
         /// The agent must set maxStepReached.</param>
-        /// <param name="academyDone">If set to <c>true</c> 
+        /// <param name="academyDone">If set to <c>true</c>
         /// The agent must set done.</param>
         /// <param name="academyStepCounter">Number of current steps in episode</param>
         void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
@@ -984,7 +995,7 @@ void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
                 maxStepReached = true;
             }
 
-            // If the Academy needs to reset, the agent should reset 
+            // If the Academy needs to reset, the agent should reset
             // even if it reseted recently.
             if (academyDone)
             {
@@ -996,7 +1007,7 @@ void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
         /// Signals the agent that it must reset if its done flag is set to true.
         void ResetIfDone()
         {
-            // If an agent is done, then it will also 
+            // If an agent is done, then it will also
             // request for a decision and an action
             if (IsDone())
             {
@@ -1126,14 +1137,14 @@ public static Texture2D ObservationToTexture(Camera obsCamera, int width, int he
             obsCamera.Render();
 
             texture2D.ReadPixels(new Rect(0, 0, texture2D.width, texture2D.height), 0, 0);
-            
+
             obsCamera.targetTexture = prevCameraRT;
             obsCamera.rect = oldRec;
             RenderTexture.active = prevActiveRT;
             RenderTexture.ReleaseTemporary(tempRT);
             return texture2D;
         }
-        
+
         /// <summary>
         /// Converts a RenderTexture and correspinding resolution to a 2D texture.
         /// </summary>
@@ -1150,7 +1161,7 @@ public static Texture2D ObservationToTexture(RenderTexture obsTexture, int width
             {
                 texture2D.Resize(width, height);
             }
-            
+
             if(width != obsTexture.width || height != obsTexture.height)
             {
                 throw new UnityAgentsException(string.Format(
@@ -1175,5 +1186,5 @@ public void SetCustomObservation(CustomObservation customObservation)
         {
             info.customObservation = customObservation;
         }
-    }    
+    }
 }
@@ -6,15 +6,15 @@
 namespace MLAgents
 {
     /// <summary>
-    /// The batcher is an RL specific class that makes sure that the information each object in 
-    /// Unity (Academy and Brains) wants to send to External is appropriately batched together 
+    /// The batcher is an RL specific class that makes sure that the information each object in
+    /// Unity (Academy and Brains) wants to send to External is appropriately batched together
     /// and sent only when necessary.
-    /// 
+    ///
     /// The Batcher will only send a Message to the Communicator when either :
     ///     1 - The academy is done
     ///     2 - At least one brain has data to send
-    /// 
-    /// At each step, the batcher will keep track of the brains that queried the batcher for that 
+    ///
+    /// At each step, the batcher will keep track of the brains that queried the batcher for that
     /// step. The batcher can only send the batched data when all the Brains have queried the
     /// Batcher.
     /// </summary>
@@ -67,7 +67,7 @@ public Batcher(Communicator communicator)
         }
 
         /// <summary>
-        /// Sends the academy parameters through the Communicator. 
+        /// Sends the academy parameters through the Communicator.
         /// Is used by the academy to send the AcademyParameters to the communicator.
         /// </summary>
         /// <returns>The External Initialization Parameters received.</returns>
@@ -104,7 +104,7 @@ public CommunicatorObjects.UnityRLInitializationInput SendAcademyParameters(
         /// Registers the done flag of the academy to the next output to be sent
         /// to the communicator.
         /// </summary>
-        /// <param name="done">If set to <c>true</c> 
+        /// <param name="done">If set to <c>true</c>
         /// The academy done state will be sent to External at the next Exchange.</param>
         public void RegisterAcademyDoneFlag(bool done)
         {
@@ -164,7 +164,7 @@ public void SubscribeBrain(string brainKey)
 
         /// <summary>
         /// Sends the brain info. If at least one brain has an agent in need of
-        /// a decision or if the academy is done, the data is sent via 
+        /// a decision or if the academy is done, the data is sent via
         /// Communicator. Else, a new step is realized. The data can only be
         /// sent once all the brains that subscribed to the batcher have tried
         /// to send information.
@@ -198,6 +198,9 @@ public void SendBrainInfo(
                 {
                     CommunicatorObjects.AgentInfoProto agentInfoProto = agentInfo[agent].ToProto();
                     m_currentUnityRLOutput.AgentInfos[brainKey].Value.Add(agentInfoProto);
+                    // Avoid visual obs memory leak. This should be called AFTER we are done with the visual obs.
+                    // e.g. after recording them to demo and using them for inference.
+                    agentInfo[agent].ClearVisualObs();
                 }
 
                 m_hasData[brainKey] = true;
 
@@ -4,12 +4,12 @@
 
 setup(
     name="gym_unity",
-    version="0.4.3",
+    version="0.4.3a",
     description="Unity Machine Learning Agents Gym Interface",
     license="Apache License 2.0",
     author="Unity Technologies",
     author_email="[email protected]",
     url="https://github.com/Unity-Technologies/ml-agents",
     packages=find_packages(),
-    install_requires=["gym", "mlagents_envs==0.9.0"],
+    install_requires=["gym", "mlagents_envs==0.9.0a"],
 )