8
8
namespace MLAgents
9
9
{
10
10
/// <summary>
11
- /// Struct that contains all the information for an Agent, including its
11
+ /// Struct that contains all the information for an Agent, including its
12
12
/// observations, actions and current status, that is sent to the Brain.
13
13
/// </summary>
14
14
public struct AgentInfo
@@ -120,15 +120,26 @@ public CommunicatorObjects.AgentInfoProto ToProto()
120
120
agentInfoProto . VisualObservations . Add (
121
121
ByteString . CopyFrom ( obs . EncodeToPNG ( ) )
122
122
) ;
123
+ }
124
+ return agentInfoProto ;
125
+ }
126
+
127
+ /// <summary>
128
+ /// Remove the visual observations from memory. Call at each timestep
129
+ /// to avoid memory leaks.
130
+ /// </summary>
131
+ public void ClearVisualObs ( )
132
+ {
133
+ foreach ( Texture2D obs in visualObservations )
134
+ {
123
135
Object . Destroy ( obs ) ;
124
136
}
125
137
visualObservations . Clear ( ) ;
126
- return agentInfoProto ;
127
138
}
128
139
}
129
140
130
141
/// <summary>
131
- /// Struct that contains the action information sent from the Brain to the
142
+ /// Struct that contains the action information sent from the Brain to the
132
143
/// Agent.
133
144
/// </summary>
134
145
public struct AgentAction
@@ -141,7 +152,7 @@ public struct AgentAction
141
152
}
142
153
143
154
/// <summary>
144
- /// Struct that contains all the Agent-specific parameters provided in the
155
+ /// Struct that contains all the Agent-specific parameters provided in the
145
156
/// Editor. This excludes the Brain linked to the Agent since it can be
146
157
/// modified programmatically.
147
158
/// </summary>
@@ -153,7 +164,7 @@ public class AgentParameters
153
164
/// observations.
154
165
/// </summary>
155
166
public List < Camera > agentCameras = new List < Camera > ( ) ;
156
-
167
+
157
168
/// <summary>
158
169
/// The list of the RenderTextures the agent uses for visual
159
170
/// observations.
@@ -162,7 +173,7 @@ public class AgentParameters
162
173
163
174
164
175
/// <summary>
165
- /// The maximum number of steps the agent takes before being done.
176
+ /// The maximum number of steps the agent takes before being done.
166
177
/// </summary>
167
178
/// <remarks>
168
179
/// If set to 0, the agent can only be set to done programmatically (or
@@ -184,7 +195,7 @@ public class AgentParameters
184
195
public bool resetOnDone = true ;
185
196
186
197
/// <summary>
187
- /// Whether to enable On Demand Decisions or make a decision at
198
+ /// Whether to enable On Demand Decisions or make a decision at
188
199
/// every step.
189
200
/// </summary>
190
201
public bool onDemandDecision ;
@@ -199,8 +210,8 @@ public class AgentParameters
199
210
200
211
/// <summary>
201
212
/// Agent Monobehavior class that is attached to a Unity GameObject, making it
202
- /// an Agent. An agent produces observations and takes actions in the
203
- /// environment. Observations are determined by the cameras attached
213
+ /// an Agent. An agent produces observations and takes actions in the
214
+ /// environment. Observations are determined by the cameras attached
204
215
/// to the agent in addition to the vector observations implemented by the
205
216
/// user in <see cref="CollectObservations"/>. On the other hand, actions
206
217
/// are determined by decisions produced by a linked Brain. Currently, this
@@ -213,34 +224,34 @@ public class AgentParameters
213
224
/// however, an agent need not send its observation at every step since very
214
225
/// little may have changed between sucessive steps. Currently, how often an
215
226
/// agent updates its brain with a fresh observation is determined by the
216
- /// Academy.
217
- ///
218
- /// At any step, an agent may be considered <see cref="done"/>.
227
+ /// Academy.
228
+ ///
229
+ /// At any step, an agent may be considered <see cref="done"/>.
219
230
/// This could occur due to a variety of reasons:
220
231
/// - The agent reached an end state within its environment.
221
232
/// - The agent reached the maximum # of steps (i.e. timed out).
222
233
/// - The academy reached the maximum # of steps (forced agent to be done).
223
- ///
234
+ ///
224
235
/// Here, an agent reaches an end state if it completes its task successfully
225
236
/// or somehow fails along the way. In the case where an agent is done before
226
237
/// the academy, it either resets and restarts, or just lingers until the
227
238
/// academy is done.
228
- ///
239
+ ///
229
240
/// An important note regarding steps and episodes is due. Here, an agent step
230
241
/// corresponds to an academy step, which also corresponds to Unity
231
242
/// environment step (i.e. each FixedUpdate call). This is not the case for
232
- /// episodes. The academy controls the global episode count and each agent
243
+ /// episodes. The academy controls the global episode count and each agent
233
244
/// controls its own local episode count and can reset and start a new local
234
245
/// episode independently (based on its own experience). Thus an academy
235
246
/// (global) episode can be viewed as the upper-bound on an agents episode
236
247
/// length and that within a single global episode, an agent may have completed
237
248
/// multiple local episodes. Consequently, if an agent max step is
238
249
/// set to a value larger than the academy max steps value, then the academy
239
250
/// value takes precedence (since the agent max step will never be reached).
240
- ///
251
+ ///
241
252
/// Lastly, note that at any step the brain linked to the agent is allowed to
242
253
/// change programmatically with <see cref="GiveBrain"/>.
243
- ///
254
+ ///
244
255
/// Implementation-wise, it is required that this class is extended and the
245
256
/// virtual methods overridden. For sample implementations of agent behavior,
246
257
/// see the Examples/ directory within this Unity project.
@@ -252,7 +263,7 @@ public abstract class Agent : MonoBehaviour
252
263
{
253
264
/// <summary>
254
265
/// The Brain attached to this agent. A brain can be attached either
255
- /// directly from the Editor through AgentEditor or
266
+ /// directly from the Editor through AgentEditor or
256
267
/// programmatically through <see cref="GiveBrain"/>. It is OK for an agent
257
268
/// to not have a brain, as long as no decision is requested.
258
269
/// </summary>
@@ -523,7 +534,7 @@ void ResetData()
523
534
actionMasker = new ActionMasker ( param ) ;
524
535
// If we haven't initialized vectorActions, initialize to 0. This should only
525
536
// happen during the creation of the Agent. In subsequent episodes, vectorAction
526
- // should stay the previous action before the Done(), so that it is properly recorded.
537
+ // should stay the previous action before the Done(), so that it is properly recorded.
527
538
if ( action . vectorActions == null )
528
539
{
529
540
if ( param . vectorActionSpaceType == SpaceType . continuous )
@@ -598,9 +609,9 @@ void SendInfoToBrain()
598
609
brain . brainParameters . vectorObservationSize ,
599
610
info . vectorObservation . Count ) ) ;
600
611
}
601
-
612
+
602
613
Utilities . ShiftLeft ( info . stackedVectorObservation , param . vectorObservationSize ) ;
603
- Utilities . ReplaceRange ( info . stackedVectorObservation , info . vectorObservation ,
614
+ Utilities . ReplaceRange ( info . stackedVectorObservation , info . vectorObservation ,
604
615
info . stackedVectorObservation . Count - info . vectorObservation . Count ) ;
605
616
606
617
info . visualObservations . Clear ( ) ;
@@ -624,7 +635,7 @@ void SendInfoToBrain()
624
635
param . cameraResolutions [ i ] . height ) ;
625
636
info . visualObservations . Add ( obsTexture ) ;
626
637
}
627
-
638
+
628
639
//Then add all renderTextures
629
640
var camCount = agentParameters . agentCameras . Count ;
630
641
for ( int i = 0 ; i < agentParameters . agentRenderTextures . Count ; i ++ )
@@ -653,13 +664,13 @@ void SendInfoToBrain()
653
664
654
665
/// <summary>
655
666
/// Collects the (vector, visual, text) observations of the agent.
656
- /// The agent observation describes the current environment from the
667
+ /// The agent observation describes the current environment from the
657
668
/// perspective of the agent.
658
669
/// </summary>
659
670
/// <remarks>
660
671
/// Simply, an agents observation is any environment information that helps
661
672
/// the Agent acheive its goal. For example, for a fighting Agent, its
662
- /// observation could include distances to friends or enemies, or the
673
+ /// observation could include distances to friends or enemies, or the
663
674
/// current level of ammunition at its disposal.
664
675
/// Recall that an Agent may attach vector, visual or textual observations.
665
676
/// Vector observations are added by calling the provided helper methods:
@@ -678,7 +689,7 @@ void SendInfoToBrain()
678
689
/// needs to match the vectorObservationSize attribute of the linked Brain.
679
690
/// Visual observations are implicitly added from the cameras attached to
680
691
/// the Agent.
681
- /// Lastly, textual observations are added using
692
+ /// Lastly, textual observations are added using
682
693
/// <see cref="SetTextObs(string)"/>.
683
694
/// </remarks>
684
695
public virtual void CollectObservations ( )
@@ -861,7 +872,7 @@ public virtual void AgentAction(float[] vectorAction, string textAction, Communi
861
872
}
862
873
863
874
/// <summary>
864
- /// Specifies the agent behavior when done and
875
+ /// Specifies the agent behavior when done and
865
876
/// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
866
877
/// used to remove the agent from the scene.
867
878
/// </summary>
@@ -906,12 +917,12 @@ public void UpdateMemoriesAction(List<float> memories)
906
917
{
907
918
action . memories = memories ;
908
919
}
909
-
920
+
910
921
public void AppendMemoriesAction ( List < float > memories )
911
922
{
912
923
action . memories . AddRange ( memories ) ;
913
924
}
914
-
925
+
915
926
public List < float > GetMemoriesAction ( )
916
927
{
917
928
return action . memories ;
@@ -966,9 +977,9 @@ protected float ScaleAction(float rawAction, float min, float max)
966
977
/// <summary>
967
978
/// Sets the status of the agent.
968
979
/// </summary>
969
- /// <param name="academyMaxStep">If set to <c>true</c>
980
+ /// <param name="academyMaxStep">If set to <c>true</c>
970
981
/// The agent must set maxStepReached.</param>
971
- /// <param name="academyDone">If set to <c>true</c>
982
+ /// <param name="academyDone">If set to <c>true</c>
972
983
/// The agent must set done.</param>
973
984
/// <param name="academyStepCounter">Number of current steps in episode</param>
974
985
void SetStatus ( bool academyMaxStep , bool academyDone , int academyStepCounter )
@@ -984,7 +995,7 @@ void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
984
995
maxStepReached = true ;
985
996
}
986
997
987
- // If the Academy needs to reset, the agent should reset
998
+ // If the Academy needs to reset, the agent should reset
988
999
// even if it reseted recently.
989
1000
if ( academyDone )
990
1001
{
@@ -996,7 +1007,7 @@ void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
996
1007
/// Signals the agent that it must reset if its done flag is set to true.
997
1008
void ResetIfDone ( )
998
1009
{
999
- // If an agent is done, then it will also
1010
+ // If an agent is done, then it will also
1000
1011
// request for a decision and an action
1001
1012
if ( IsDone ( ) )
1002
1013
{
@@ -1126,14 +1137,14 @@ public static Texture2D ObservationToTexture(Camera obsCamera, int width, int he
1126
1137
obsCamera . Render ( ) ;
1127
1138
1128
1139
texture2D . ReadPixels ( new Rect ( 0 , 0 , texture2D . width , texture2D . height ) , 0 , 0 ) ;
1129
-
1140
+
1130
1141
obsCamera . targetTexture = prevCameraRT ;
1131
1142
obsCamera . rect = oldRec ;
1132
1143
RenderTexture . active = prevActiveRT ;
1133
1144
RenderTexture . ReleaseTemporary ( tempRT ) ;
1134
1145
return texture2D ;
1135
1146
}
1136
-
1147
+
1137
1148
/// <summary>
1138
1149
/// Converts a RenderTexture and correspinding resolution to a 2D texture.
1139
1150
/// </summary>
@@ -1150,7 +1161,7 @@ public static Texture2D ObservationToTexture(RenderTexture obsTexture, int width
1150
1161
{
1151
1162
texture2D . Resize ( width , height ) ;
1152
1163
}
1153
-
1164
+
1154
1165
if ( width != obsTexture . width || height != obsTexture . height )
1155
1166
{
1156
1167
throw new UnityAgentsException ( string . Format (
@@ -1175,5 +1186,5 @@ public void SetCustomObservation(CustomObservation customObservation)
1175
1186
{
1176
1187
info . customObservation = customObservation ;
1177
1188
}
1178
- }
1189
+ }
1179
1190
}
0 commit comments