Fix off-by-one error on AgentReset and maxSteps (#3394)

vincentpierre · chriselion · web-flow · commit 48b2decb4742 · 2020-02-10T14:27:21.000-08:00
* Fix ballance ball 100 reward

* Re-test

* Add test for maxSteps and number of AgentActions

Co-authored-by: Chris Elion &lt;celion@gmail.com&gt;
diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs
@@ -753,7 +753,7 @@ void SendInfo()
         /// Used by the brain to make the agent perform a step.
         void AgentStep()
         {
-            if ((m_StepCount >= maxStep - 1) && (maxStep > 0))
+            if ((m_StepCount >= maxStep) && (maxStep > 0))
             {
                 NotifyAgentDone(true);
                 _AgentReset();
@@ -762,6 +762,7 @@ void AgentStep()
             {
                 m_StepCount += 1;
             }
+
             if ((m_RequestAction) && (m_Brain != null))
             {
                 m_RequestAction = false;
diff --git a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
@@ -37,7 +37,9 @@ internal void SetPolicy(IPolicy policy)
 
         public int initializeAgentCalls;
         public int collectObservationsCalls;
+        public int collectObservationsCallsSinceLastReset;
         public int agentActionCalls;
+        public int agentActionCallsSinceLastReset;
         public int agentResetCalls;
         public override void InitializeAgent()
         {
@@ -54,18 +56,22 @@ public override void InitializeAgent()
         public override void CollectObservations()
         {
             collectObservationsCalls += 1;
+            collectObservationsCallsSinceLastReset += 1;
             AddVectorObs(0f);
         }
 
         public override void AgentAction(float[] vectorAction)
         {
             agentActionCalls += 1;
+            agentActionCallsSinceLastReset += 1;
             AddReward(0.1f);
         }
 
         public override void AgentReset()
         {
             agentResetCalls += 1;
+            collectObservationsCallsSinceLastReset = 0;
+            agentActionCallsSinceLastReset = 0;
         }
 
         public override float[] Heuristic()
@@ -484,7 +490,7 @@ public void TestCumulativeReward()
             var j = 0;
             for (var i = 0; i < 500; i++)
             {
-                if (i % 20 == 0)
+                if (i % 21 == 0)
                 {
                     j = 0;
                 }
@@ -500,5 +506,40 @@ public void TestCumulativeReward()
                 aca.EnvironmentStep();
             }
         }
+
+        [Test]
+        public void TestMaxStepsReset()
+        {
+            var agentGo1 = new GameObject("TestAgent");
+            agentGo1.AddComponent<TestAgent>();
+            var agent1 = agentGo1.GetComponent<TestAgent>();
+            var aca = Academy.Instance;
+
+            var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
+            decisionRequester.DecisionPeriod = 1;
+            decisionRequester.Awake();
+
+            var maxStep = 6;
+            agent1.maxStep = maxStep;
+            agent1.LazyInitialize();
+
+            for (var i = 0; i < 15; i++)
+            {
+                // We expect resets to occur when there are maxSteps actions since the last reset (and on the first step)
+                var expectReset = agent1.agentActionCallsSinceLastReset == maxStep || (i == 0);
+                var previousNumResets = agent1.agentResetCalls;
+
+                aca.EnvironmentStep();
+
+                if (expectReset)
+                {
+                    Assert.AreEqual(previousNumResets + 1, agent1.agentResetCalls);
+                }
+                else
+                {
+                    Assert.AreEqual(previousNumResets, agent1.agentResetCalls);
+                }
+            }
+        }
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -753,7 +753,7 @@ void SendInfo()`
`753`	`753`	`/// Used by the brain to make the agent perform a step.`
`754`	`754`	`void AgentStep()`
`755`	`755`	`{`
`756`		`- if ((m_StepCount >= maxStep - 1) && (maxStep > 0))`
	`756`	`+ if ((m_StepCount >= maxStep) && (maxStep > 0))`
`757`	`757`	`{`
`758`	`758`	`NotifyAgentDone(true);`
`759`	`759`	`_AgentReset();`
`@@ -762,6 +762,7 @@ void AgentStep()`
`762`	`762`	`{`
`763`	`763`	`m_StepCount += 1;`
`764`	`764`	`}`
	`765`	`+`
`765`	`766`	`if ((m_RequestAction) && (m_Brain != null))`
`766`	`767`	`{`
`767`	`768`	`m_RequestAction = false;`