Hotfix 0.3.0c (#618)

awjuliani · web-flow · commit 3dfd85216bb0 · 2018-04-13T08:14:14.000+08:00
Fixes the following issues:

* Missing component reference in BananaRL environment.
* Neural Network for multiple visual observations was not properly generated.
* Episode time-out value estimate bootstrapping used incorrect observation as input.
diff --git a/docs/Getting-Started-with-Balance-Ball.md b/docs/Getting-Started-with-Balance-Ball.md
@@ -269,7 +269,7 @@ on the same graph.
 
 To summarize, go to your command line, enter the `ml-agents` directory and type: 
 
-```python
+```
 python3 python/learn.py <env_file_path> --run-id=<run-identifier> --train 
 ```
 **Note**: If you're using Anaconda, don't forget to activate the ml-agents environment first.
diff --git a/docs/Learning-Environment-Examples.md b/docs/Learning-Environment-Examples.md
@@ -154,15 +154,15 @@ If you would like to contribute environments, please see our
 ![Banana](images/banana.png)
 
 * Set-up: A multi-agent environment where agents compete to collect bananas. 
-* Goal: The agents must learn to move to as many yellow bananas as possible while avoiding red bananas.
-* Agents: The environment contains 10 agents linked to a single brain.
+* Goal: The agents must learn to move to as many yellow bananas as possible while avoiding blue bananas.
+* Agents: The environment contains 5 agents linked to a single brain.
 * Agent Reward Function (independent): 
     * +1 for interaction with yellow banana
-    * -1 for interaction with red banana.
+    * -1 for interaction with blue banana.
 * Brains: One brain with the following observation/action space.
-    * Vector Observation space: (Continuous) 51 corresponding to velocity of agent, plus ray-based perception of objects around agent's forward direction.
+    * Vector Observation space: (Continuous) 53 corresponding to velocity of agent (2), whether agent is frozen and/or shot its laser (2), plus ray-based perception of objects around agent's forward direction (49; 7 raycast angles with 7 measurements for each).
     * Vector Action space: (Continuous) Size of 3, corresponding to forward movement, y-axis rotation, and whether to use laser to disable other agents.
-    * Visual Observations (Optional): First-person view for each agent. 
+    * Visual Observations (Optional; None by default): First-person view for each agent. 
 * Reset Parameters: None
 
 ## Hallway
diff --git a/python/trainer_config.yaml b/python/trainer_config.yaml
@@ -80,6 +80,12 @@ GoalieBrain:
 
 Ball3DBrain:
     normalize: true
+    batch_size: 1200
+    buffer_size: 12000
+    summary_freq: 1000
+    time_horizon: 1000
+    gamma: 0.995
+    beta: 0.001
 
 BouncerBrain:
     normalize: true
diff --git a/python/unitytrainers/bc/trainer.py b/python/unitytrainers/bc/trainer.py
@@ -229,13 +229,14 @@ def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take
                         self.episode_steps[agent_id] = 0
                     self.episode_steps[agent_id] += 1
 
-    def process_experiences(self, info: AllBrainInfo):
+    def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
         """
         Checks agent histories for processing condition, and processes them as necessary.
         Processing involves calculating value and advantage targets for model updating step.
-        :param info: Current AllBrainInfo
+        :param current_info: Current AllBrainInfo
+        :param next_info: Next AllBrainInfo
         """
-        info_teacher = info[self.brain_to_imitate]
+        info_teacher = next_info[self.brain_to_imitate]
         for l in range(len(info_teacher.agents)):
             if ((info_teacher.local_done[l] or
                  len(self.training_buffer[info_teacher.agents[l]]['actions']) > self.trainer_parameters[
@@ -246,7 +247,7 @@ def process_experiences(self, info: AllBrainInfo):
                                                           training_length=self.sequence_length)
                 self.training_buffer[agent_id].reset_agent()
 
-        info_student = info[self.brain_name]
+        info_student = next_info[self.brain_name]
         for l in range(len(info_student.agents)):
             if info_student.local_done[l]:
                 agent_id = info_student.agents[l]
diff --git a/python/unitytrainers/models.py b/python/unitytrainers/models.py
@@ -80,15 +80,16 @@ def create_continuous_state_encoder(self, h_size, activation, num_layers):
                                      kernel_initializer=c_layers.variance_scaling_initializer(1.0))
         return hidden
 
-    def create_visual_encoder(self, h_size, activation, num_layers):
+    def create_visual_encoder(self, image_input, h_size, activation, num_layers):
         """
         Builds a set of visual (CNN) encoders.
+        :param image_input: The placeholder for the image input to use.
         :param h_size: Hidden layer size.
         :param activation: What type of activation function to use for layers.
         :param num_layers: number of hidden layers to create.
         :return: List of hidden layer tensors.
         """
-        conv1 = tf.layers.conv2d(self.visual_in[-1], 16, kernel_size=[8, 8], strides=[4, 4],
+        conv1 = tf.layers.conv2d(image_input, 16, kernel_size=[8, 8], strides=[4, 4],
                                  activation=tf.nn.elu)
         conv2 = tf.layers.conv2d(conv1, 32, kernel_size=[4, 4], strides=[2, 2],
                                  activation=tf.nn.elu)
@@ -136,7 +137,7 @@ def create_new_obs(self, num_streams, h_size, num_layers):
             hidden_state, hidden_visual = None, None
             if brain.number_visual_observations > 0:
                 for j in range(brain.number_visual_observations):
-                    encoded_visual = self.create_visual_encoder(h_size, activation_fn, num_layers)
+                    encoded_visual = self.create_visual_encoder(self.visual_in[j], h_size, activation_fn, num_layers)
                     visual_encoders.append(encoded_visual)
                 hidden_visual = tf.concat(visual_encoders, axis=1)
             if brain.vector_observation_space_size > 0:
diff --git a/python/unitytrainers/ppo/trainer.py b/python/unitytrainers/ppo/trainer.py
@@ -260,34 +260,39 @@ def add_experiences(self, curr_all_info: AllBrainInfo, next_all_info: AllBrainIn
                         self.episode_steps[agent_id] = 0
                     self.episode_steps[agent_id] += 1
 
-
-    def process_experiences(self, all_info: AllBrainInfo):
+    def process_experiences(self, current_info: AllBrainInfo, new_info: AllBrainInfo):
         """
         Checks agent histories for processing condition, and processes them as necessary.
         Processing involves calculating value and advantage targets for model updating step.
-        :param all_info: Dictionary of all current brains and corresponding BrainInfo.
+        :param current_info: Dictionary of all current brains and corresponding BrainInfo.
+        :param new_info: Dictionary of all next brains and corresponding BrainInfo.
         """
 
-        info = all_info[self.brain_name]
+        info = new_info[self.brain_name]
+        last_info = current_info[self.brain_name]
         for l in range(len(info.agents)):
             agent_actions = self.training_buffer[info.agents[l]]['actions']
             if ((info.local_done[l] or len(agent_actions) > self.trainer_parameters['time_horizon'])
                 and len(agent_actions) > 0):
                 if info.local_done[l] and not info.max_reached[l]:
                     value_next = 0.0
                 else:
-                    feed_dict = {self.model.batch_size: len(info.vector_observations), self.model.sequence_length: 1}
+                    if info.max_reached[l]:
+                        bootstrapping_info = last_info
+                    else:
+                        bootstrapping_info = info
+                    feed_dict = {self.model.batch_size: len(bootstrapping_info.vector_observations), self.model.sequence_length: 1}
                     if self.use_observations:
-                        for i in range(len(info.visual_observations)):
-                            feed_dict[self.model.visual_in[i]] = info.visual_observations[i]
+                        for i in range(len(bootstrapping_info.visual_observations)):
+                            feed_dict[self.model.visual_in[i]] = bootstrapping_info.visual_observations[i]
                     if self.use_states:
-                        feed_dict[self.model.vector_in] = info.vector_observations
+                        feed_dict[self.model.vector_in] = bootstrapping_info.vector_observations
                     if self.use_recurrent:
-                        if info.memories.shape[1] == 0:
-                            info.memories = np.zeros((len(info.vector_observations), self.m_size))
-                        feed_dict[self.model.memory_in] = info.memories
+                        if bootstrapping_info.memories.shape[1] == 0:
+                            bootstrapping_info.memories = np.zeros((len(bootstrapping_info.vector_observations), self.m_size))
+                        feed_dict[self.model.memory_in] = bootstrapping_info.memories
                     if not self.is_continuous_action and self.use_recurrent:
-                        feed_dict[self.model.prev_action] = np.reshape(info.previous_vector_actions, [-1])
+                        feed_dict[self.model.prev_action] = np.reshape(bootstrapping_info.previous_vector_actions, [-1])
                     value_next = self.sess.run(self.model.value, feed_dict)[l]
                 agent_id = info.agents[l]
 
diff --git a/python/unitytrainers/trainer.py b/python/unitytrainers/trainer.py
@@ -103,11 +103,12 @@ def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take
         """
         raise UnityTrainerException("The add_experiences method was not implemented.")
 
-    def process_experiences(self, info: AllBrainInfo):
+    def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
         """
         Checks agent histories for processing condition, and processes them as necessary.
         Processing involves calculating value and advantage targets for model updating step.
-        :param info: Dictionary of all current brains and corresponding BrainInfo.
+        :param current_info: Dictionary of all current-step brains and corresponding BrainInfo.
+        :param next_info: Dictionary of all next-step brains and corresponding BrainInfo.
         """
         raise UnityTrainerException("The process_experiences method was not implemented.")
 
diff --git a/python/unitytrainers/trainer_controller.py b/python/unitytrainers/trainer_controller.py
@@ -253,13 +253,11 @@ def start_learning(self):
 
                     for brain_name, trainer in self.trainers.items():
                         trainer.add_experiences(curr_info, new_info, take_action_outputs[brain_name])
-                    curr_info = new_info
-                    for brain_name, trainer in self.trainers.items():
-                        trainer.process_experiences(curr_info)
+                        trainer.process_experiences(curr_info, new_info)
                         if trainer.is_ready_update() and self.train_model and trainer.get_step <= trainer.get_max_steps:
                             # Perform gradient descent with experience buffer
                             trainer.update_model()
-                        # Write training statistics to tensorboard.
+                        # Write training statistics to Tensorboard.
                         trainer.write_summary(self.env.curriculum.lesson_number)
                         if self.train_model and trainer.get_step <= trainer.get_max_steps:
                             trainer.increment_step()
@@ -269,7 +267,7 @@ def start_learning(self):
                     if global_step % self.save_freq == 0 and global_step != 0 and self.train_model:
                         # Save Tensorflow model
                         self._save_model(sess, steps=global_step, saver=saver)
-
+                    curr_info = new_info
                 # Final save Tensorflow model
                 if global_step != 0 and self.train_model:
                     self._save_model(sess,  steps=global_step, saver=saver)
diff --git a/unity-environment/Assets/ML-Agents/Examples/BananaCollectors/BananaRL.unity b/unity-environment/Assets/ML-Agents/Examples/BananaCollectors/BananaRL.unity
@@ -13,7 +13,7 @@ OcclusionCullingSettings:
 --- !u!104 &2
 RenderSettings:
   m_ObjectHideFlags: 0
-  serializedVersion: 9
+  serializedVersion: 8
   m_Fog: 0
   m_FogColor: {r: 0.5, g: 0.5, b: 0.5, a: 1}
   m_FogMode: 3
@@ -39,12 +39,11 @@ RenderSettings:
   m_CustomReflection: {fileID: 0}
   m_Sun: {fileID: 0}
   m_IndirectSpecularColor: {r: 0, g: 0, b: 0, a: 1}
-  m_UseRadianceAmbientProbe: 0
 --- !u!157 &3
 LightmapSettings:
   m_ObjectHideFlags: 0
   serializedVersion: 11
-  m_GIWorkflowMode: 0
+  m_GIWorkflowMode: 1
   m_GISettings:
     serializedVersion: 2
     m_BounceScale: 1
@@ -55,10 +54,11 @@ LightmapSettings:
     m_EnableBakedLightmaps: 1
     m_EnableRealtimeLightmaps: 1
   m_LightmapEditorSettings:
-    serializedVersion: 10
+    serializedVersion: 9
     m_Resolution: 2
     m_BakeResolution: 40
-    m_AtlasSize: 1024
+    m_TextureWidth: 1024
+    m_TextureHeight: 1024
     m_AO: 1
     m_AOMaxDistance: 1
     m_CompAOExponent: 1
@@ -678,8 +678,13 @@ Prefab:
       objectReference: {fileID: 0}
     - target: {fileID: 1819751139121548, guid: 38400a68c4ea54b52998e34ee238d1a7, type: 2}
       propertyPath: m_IsActive
-      value: 0
+      value: 1
       objectReference: {fileID: 0}
+    - target: {fileID: 114508049814297234, guid: 38400a68c4ea54b52998e34ee238d1a7,
+        type: 2}
+      propertyPath: myAcademyObj
+      value: 
+      objectReference: {fileID: 1574236047}
     m_RemovedComponents: []
   m_ParentPrefab: {fileID: 100100000, guid: 38400a68c4ea54b52998e34ee238d1a7, type: 2}
   m_IsPrefabParent: 0
@@ -776,8 +781,13 @@ Prefab:
       objectReference: {fileID: 0}
     - target: {fileID: 1819751139121548, guid: 38400a68c4ea54b52998e34ee238d1a7, type: 2}
       propertyPath: m_IsActive
-      value: 0
+      value: 1
       objectReference: {fileID: 0}
+    - target: {fileID: 114508049814297234, guid: 38400a68c4ea54b52998e34ee238d1a7,
+        type: 2}
+      propertyPath: myAcademyObj
+      value: 
+      objectReference: {fileID: 1574236047}
     m_RemovedComponents: []
   m_ParentPrefab: {fileID: 100100000, guid: 38400a68c4ea54b52998e34ee238d1a7, type: 2}
   m_IsPrefabParent: 0
@@ -841,7 +851,6 @@ Camera:
   m_TargetEye: 3
   m_HDR: 1
   m_AllowMSAA: 1
-  m_AllowDynamicResolution: 0
   m_ForceIntoRT: 1
   m_OcclusionCulling: 1
   m_StereoConvergence: 10
@@ -1204,8 +1213,13 @@ Prefab:
       objectReference: {fileID: 0}
     - target: {fileID: 1819751139121548, guid: 38400a68c4ea54b52998e34ee238d1a7, type: 2}
       propertyPath: m_IsActive
-      value: 0
+      value: 1
       objectReference: {fileID: 0}
+    - target: {fileID: 114508049814297234, guid: 38400a68c4ea54b52998e34ee238d1a7,
+        type: 2}
+      propertyPath: myAcademyObj
+      value: 
+      objectReference: {fileID: 1574236047}
     m_RemovedComponents: []
   m_ParentPrefab: {fileID: 100100000, guid: 38400a68c4ea54b52998e34ee238d1a7, type: 2}
   m_IsPrefabParent: 0