@@ -19,9 +19,9 @@ namespace Unity.MLAgents
1919 internal struct AgentInfo
2020 {
2121 /// <summary>
22- /// Keeps track of the last vector action taken by the Brain.
22+ /// Keeps track of the last actions taken by the Brain.
2323 /// </summary>
24- public ActionBuffers storedVectorActions ;
24+ public ActionBuffers storedActions ;
2525
2626 /// <summary>
2727 /// For discrete control, specifies the actions that the agent cannot take.
@@ -52,17 +52,17 @@ internal struct AgentInfo
5252
5353 public void ClearActions ( )
5454 {
55- storedVectorActions . Clear ( ) ;
55+ storedActions . Clear ( ) ;
5656 }
5757
5858 public void CopyActions ( ActionBuffers actionBuffers )
5959 {
60- var continuousActions = storedVectorActions . ContinuousActions ;
60+ var continuousActions = storedActions . ContinuousActions ;
6161 for ( var i = 0 ; i < actionBuffers . ContinuousActions . Length ; i ++ )
6262 {
6363 continuousActions [ i ] = actionBuffers . ContinuousActions [ i ] ;
6464 }
65- var discreteActions = storedVectorActions . DiscreteActions ;
65+ var discreteActions = storedActions . DiscreteActions ;
6666 for ( var i = 0 ; i < actionBuffers . DiscreteActions . Length ; i ++ )
6767 {
6868 discreteActions [ i ] = actionBuffers . DiscreteActions [ i ] ;
@@ -438,7 +438,7 @@ public void LazyInitialize()
438438 InitializeSensors ( ) ;
439439 }
440440
441- m_Info . storedVectorActions = new ActionBuffers (
441+ m_Info . storedActions = new ActionBuffers (
442442 new float [ m_ActuatorManager . NumContinuousActions ] ,
443443 new int [ m_ActuatorManager . NumDiscreteActions ]
444444 ) ;
@@ -557,7 +557,7 @@ void NotifyAgentDone(DoneReason doneReason)
557557 m_CumulativeReward = 0f ;
558558 m_RequestAction = false ;
559559 m_RequestDecision = false ;
560- m_Info . storedVectorActions . Clear ( ) ;
560+ m_Info . storedActions . Clear ( ) ;
561561 }
562562
563563 /// <summary>
@@ -886,12 +886,22 @@ public virtual void Initialize() { }
886886 /// <seealso cref="IActionReceiver.OnActionReceived"/>
887887 public virtual void Heuristic ( in ActionBuffers actionsOut )
888888 {
889+ var brainParams = m_PolicyFactory . BrainParameters ;
890+ var actionSpec = brainParams . ActionSpec ;
891+ // For continuous and discrete actions together, we don't need to fall back to the legacy method
892+ if ( actionSpec . NumContinuousActions > 0 && actionSpec . NumDiscreteActions > 0 )
893+ {
894+ Debug . LogWarning ( "Heuristic method called but not implemented. Clearing ActionBuffers." ) ;
895+ actionsOut . Clear ( ) ;
896+ return ;
897+ }
898+
889899 // Disable deprecation warnings so we can call the legacy overload.
890900#pragma warning disable CS0618
891901
892902 // The default implementation of Heuristic calls the
893903 // obsolete version for backward compatibility
894- switch ( m_PolicyFactory . BrainParameters . VectorActionSpaceType )
904+ switch ( brainParams . VectorActionSpaceType )
895905 {
896906 case SpaceType . Continuous :
897907 Heuristic ( actionsOut . ContinuousActions . Array ) ;
@@ -1038,7 +1048,7 @@ void SendInfoToBrain()
10381048 CollectObservations ( collectObservationsSensor ) ;
10391049 }
10401050 }
1041- using ( TimerStack . Instance . Scoped ( "CollectDiscreteActionMasks " ) )
1051+ using ( TimerStack . Instance . Scoped ( "WriteActionMask " ) )
10421052 {
10431053 m_ActuatorManager . WriteActionMask ( ) ;
10441054 }
@@ -1135,7 +1145,7 @@ public ReadOnlyCollection<float> GetObservations()
11351145 }
11361146
11371147 /// <summary>
1138- /// Implement `CollectDiscreteActionMasks ()` to collects the masks for discrete
1148+ /// Implement `WriteDiscreteActionMask ()` to collects the masks for discrete
11391149 /// actions. When using discrete actions, the agent will not perform the masked
11401150 /// action.
11411151 /// </summary>
@@ -1144,7 +1154,7 @@ public ReadOnlyCollection<float> GetObservations()
11441154 /// </param>
11451155 /// <remarks>
11461156 /// When using Discrete Control, you can prevent the Agent from using a certain
1147- /// action by masking it with <see cref="DiscreteActionMasker.SetMask (int, IEnumerable{int})"/>.
1157+ /// action by masking it with <see cref="IDiscreteActionMask.WriteMask (int, IEnumerable{int})"/>.
11481158 ///
11491159 /// See [Agents - Actions] for more information on masking actions.
11501160 ///
@@ -1168,30 +1178,29 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
11681178 /// on the provided action.
11691179 /// </summary>
11701180 /// <remarks>
1171- /// An action is passed to this function in the form of an array vector. Your
1172- /// implementation must use the array to direct the agent's behavior for the
1181+ /// An action is passed to this function in the form of an <seealso cref="ActionBuffers"/>.
1182+ /// Your implementation must use the array to direct the agent's behavior for the
11731183 /// current step.
11741184 ///
1175- /// You decide how many elements you need in the action array to control your
1185+ /// You decide how many elements you need in the ActionBuffers to control your
11761186 /// agent and what each element means. For example, if you want to apply a
11771187 /// force to move an agent around the environment, you can arbitrarily pick
1178- /// three values in the action array to use as the force components. During
1179- /// training, the agent's policy learns to set those particular elements of
1188+ /// three values in ActionBuffers.ContinuousActions array to use as the force components.
1189+ /// During training, the agent's policy learns to set those particular elements of
11801190 /// the array to maximize the training rewards the agent receives. (Of course,
11811191 /// if you implement a <seealso cref="Heuristic(in ActionBuffers)"/> function, it must use the same
11821192 /// elements of the action array for the same purpose since there is no learning
11831193 /// involved.)
11841194 ///
1185- /// Actions for an agent can be either *Continuous* or *Discrete*. Specify which
1186- /// type of action space an agent uses, along with the size of the action array,
1187- /// in the <see cref="BrainParameters"/> of the agent's associated
1195+ /// An Agent can use continuous and/or discrete actions. Configure this along with the size
1196+ /// of the action array, in the <see cref="BrainParameters"/> of the agent's associated
11881197 /// <see cref="BehaviorParameters"/> component.
11891198 ///
1190- /// When an agent uses the continuous action space , the values in the action
1199+ /// When an agent uses continuous actions , the values in the ActionBuffers.ContinuousActions
11911200 /// array are floating point numbers. You should clamp the values to the range,
11921201 /// -1..1, to increase numerical stability during training.
11931202 ///
1194- /// When an agent uses the discrete action space , the values in the action array
1203+ /// When an agent uses discrete actions , the values in the ActionBuffers.DiscreteActions array
11951204 /// are integers that each represent a specific, discrete action. For example,
11961205 /// you could define a set of discrete actions such as:
11971206 ///
@@ -1204,24 +1213,23 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
12041213 /// </code>
12051214 ///
12061215 /// When making a decision, the agent picks one of the five actions and puts the
1207- /// corresponding integer value in the action vector . For example, if the agent
1208- /// decided to move left, the action vector parameter would contain an array with
1216+ /// corresponding integer value in the ActionBuffers.DiscreteActions array . For example, if the agent
1217+ /// decided to move left, the ActionBuffers.DiscreteActions parameter would be an array with
12091218 /// a single element with the value 1.
12101219 ///
12111220 /// You can define multiple sets, or branches, of discrete actions to allow an
12121221 /// agent to perform simultaneous, independent actions. For example, you could
12131222 /// use one branch for movement and another branch for throwing a ball left, right,
12141223 /// up, or down, to allow the agent to do both in the same step.
12151224 ///
1216- /// The action vector of a discrete action space contains one element for each
1217- /// branch. The value of each element is the integer representing the chosen
1218- /// action for that branch. The agent always chooses one action for each
1219- /// branch.
1225+ /// The ActionBuffers.DiscreteActions array of an agent with discrete actions contains one
1226+ /// element for each branch. The value of each element is the integer representing the
1227+ /// chosen action for that branch. The agent always chooses one action for each branch.
12201228 ///
1221- /// When you use the discrete action space , you can prevent the training process
1229+ /// When you use the discrete actions , you can prevent the training process
12221230 /// or the neural network model from choosing specific actions in a step by
1223- /// implementing the <see cref="CollectDiscreteActionMasks(DiscreteActionMasker )"/>
1224- /// function . For example, if your agent is next to a wall, you could mask out any
1231+ /// implementing the <see cref="WriteDiscreteActionMask(IDiscreteActionMask )"/>
1232+ /// method . For example, if your agent is next to a wall, you could mask out any
12251233 /// actions that would result in the agent trying to move into the wall.
12261234 ///
12271235 /// For more information about implementing agent actions see [Agents - Actions].
@@ -1233,6 +1241,14 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
12331241 /// </param>
12341242 public virtual void OnActionReceived ( ActionBuffers actions )
12351243 {
1244+ var actionSpec = m_PolicyFactory . BrainParameters . ActionSpec ;
1245+ // For continuous and discrete actions together, we don't need to fall back to the legacy method
1246+ if ( actionSpec . NumContinuousActions > 0 && actionSpec . NumDiscreteActions > 0 )
1247+ {
1248+ // Nothing implemented.
1249+ return ;
1250+ }
1251+
12361252 if ( ! actions . ContinuousActions . IsEmpty ( ) )
12371253 {
12381254 m_LegacyActionCache = actions . ContinuousActions . Array ;
0 commit comments