Merge pull request #2346 from Unity-Technologies/release-0.9.0

xiaomaogy · web-flow · commit bcc2d1c198e6 · 2019-07-26T12:59:50.000-07:00
Merge latest fixes from release into develop
diff --git a/README.md b/README.md
@@ -27,7 +27,7 @@ developer communities.
 * 10+ sample Unity environments
 * Support for multiple environment configurations and training scenarios
 * Train memory-enhanced agents using deep reinforcement learning
-* Easily definable Curriculum Learning scenarios
+* Easily definable Curriculum Learning and Generalization scenarios
 * Broadcasting of agent behavior for supervised learning
 * Built-in support for Imitation Learning
 * Flexible agent control with On Demand Decision Making
@@ -77,11 +77,11 @@ If you run into any problems using the ML-Agents toolkit,
 [submit an issue](https://github.com/Unity-Technologies/ml-agents/issues) and
 make sure to include as much detail as possible.
 
-Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454). 
+Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).
 
 
 For any other questions or feedback, connect directly with the ML-Agents
-team at ml-agents@unity3d.com. 
+team at ml-agents@unity3d.com.
 
 ## Translations
 
diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs
@@ -36,8 +36,10 @@ private Dictionary<Agent, AgentInfo> GetFakeAgentInfos()
         public void Contruction()
         {
             var bp = new BrainParameters();
-            var tensorGenerator = new TensorApplier(bp, 0, new TensorCachingAllocator());
+            var alloc = new TensorCachingAllocator();
+            var tensorGenerator = new TensorApplier(bp, 0, alloc);
             Assert.IsNotNull(tensorGenerator);
+            alloc.Dispose();
         }
 
         [Test]
@@ -76,8 +78,8 @@ public void ApplyDiscreteActionOutput()
                                                                 4f, 5f, 6f, 7f, 8f})
             };
             var agentInfos = GetFakeAgentInfos();
-            
-            var applier = new DiscreteActionOutputApplier(new int[]{2, 3}, 0, new TensorCachingAllocator());
+            var alloc = new TensorCachingAllocator();
+            var applier = new DiscreteActionOutputApplier(new int[]{2, 3}, 0, alloc);
             applier.Apply(inputTensor, agentInfos);
             var agents = agentInfos.Keys.ToList();
             var agent = agents[0] as TestAgent;
@@ -88,6 +90,7 @@ public void ApplyDiscreteActionOutput()
             action = agent.GetAction();
             Assert.AreEqual(action.vectorActions[0], 1);
             Assert.AreEqual(action.vectorActions[1], 2);
+            alloc.Dispose();
         }
         
         [Test]
diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs
@@ -44,30 +44,36 @@ private Dictionary<Agent, AgentInfo> GetFakeAgentInfos()
         public void Contruction()
         {
             var bp = new BrainParameters();
-            var tensorGenerator = new TensorGenerator(bp, 0, new TensorCachingAllocator());
+            var alloc = new TensorCachingAllocator();
+            var tensorGenerator = new TensorGenerator(bp, 0, alloc);
             Assert.IsNotNull(tensorGenerator);
+            alloc.Dispose();
         }
 
         [Test]
         public void GenerateBatchSize()
         {
             var inputTensor = new TensorProxy();
+            var alloc = new TensorCachingAllocator();
             var batchSize = 4;
-            var generator = new BatchSizeGenerator(new TensorCachingAllocator());
+            var generator = new BatchSizeGenerator(alloc);
             generator.Generate(inputTensor, batchSize, null);
             Assert.IsNotNull(inputTensor.Data);
             Assert.AreEqual(inputTensor.Data[0], batchSize);
+            alloc.Dispose();
         }
         
         [Test]
         public void GenerateSequenceLength()
         {
             var inputTensor = new TensorProxy();
+            var alloc = new TensorCachingAllocator();
             var batchSize = 4;
-            var generator = new SequenceLengthGenerator(new TensorCachingAllocator());
+            var generator = new SequenceLengthGenerator(alloc);
             generator.Generate(inputTensor, batchSize, null);
             Assert.IsNotNull(inputTensor.Data);
             Assert.AreEqual(inputTensor.Data[0], 1);
+            alloc.Dispose();
         }
         
         [Test]
@@ -79,14 +85,15 @@ public void GenerateVectorObservation()
             };
             var batchSize = 4;
             var agentInfos = GetFakeAgentInfos();
-            
-            var generator = new VectorObservationGenerator(new TensorCachingAllocator());
+            var alloc = new TensorCachingAllocator();
+            var generator = new VectorObservationGenerator(alloc);
             generator.Generate(inputTensor, batchSize, agentInfos);
             Assert.IsNotNull(inputTensor.Data);
             Assert.AreEqual(inputTensor.Data[0, 0], 1);
             Assert.AreEqual(inputTensor.Data[0, 2], 3);
             Assert.AreEqual(inputTensor.Data[1, 0], 4);
             Assert.AreEqual(inputTensor.Data[1, 2], 6);
+            alloc.Dispose();
         }
         
         [Test]
@@ -98,14 +105,15 @@ public void GenerateRecurrentInput()
             };
             var batchSize = 4;
             var agentInfos = GetFakeAgentInfos();
-            
-            var generator = new RecurrentInputGenerator(new TensorCachingAllocator());
+            var alloc = new TensorCachingAllocator();
+            var generator = new RecurrentInputGenerator(alloc);
             generator.Generate(inputTensor, batchSize, agentInfos);
             Assert.IsNotNull(inputTensor.Data);
             Assert.AreEqual(inputTensor.Data[0, 0], 0);
             Assert.AreEqual(inputTensor.Data[0, 4], 0);
             Assert.AreEqual(inputTensor.Data[1, 0], 1);
             Assert.AreEqual(inputTensor.Data[1, 4], 0);
+            alloc.Dispose();
         }
         
         [Test]
@@ -119,15 +127,16 @@ public void GeneratePreviousActionInput()
             };
             var batchSize = 4;
             var agentInfos = GetFakeAgentInfos();
-
-            var generator = new PreviousActionInputGenerator(new TensorCachingAllocator());
+            var alloc = new TensorCachingAllocator();
+            var generator = new PreviousActionInputGenerator(alloc);
 
             generator.Generate(inputTensor, batchSize, agentInfos);
             Assert.IsNotNull(inputTensor.Data);
             Assert.AreEqual(inputTensor.Data[0, 0], 1);
             Assert.AreEqual(inputTensor.Data[0, 1], 2);
             Assert.AreEqual(inputTensor.Data[1, 0], 3);
             Assert.AreEqual(inputTensor.Data[1, 1], 4);
+            alloc.Dispose();
         }
         
         [Test]
@@ -141,14 +150,15 @@ public void GenerateActionMaskInput()
             };
             var batchSize = 4;
             var agentInfos = GetFakeAgentInfos();
-  
-            var generator = new ActionMaskInputGenerator(new TensorCachingAllocator());
+            var alloc = new TensorCachingAllocator();
+            var generator = new ActionMaskInputGenerator(alloc);
             generator.Generate(inputTensor, batchSize, agentInfos);
             Assert.IsNotNull(inputTensor.Data);
             Assert.AreEqual(inputTensor.Data[0, 0], 1);
             Assert.AreEqual(inputTensor.Data[0, 4], 1);
             Assert.AreEqual(inputTensor.Data[1, 0], 0);
             Assert.AreEqual(inputTensor.Data[1, 4], 1);
+            alloc.Dispose();
         }
     }
 }
diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/MultinomialTest.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/MultinomialTest.cs
@@ -160,25 +160,6 @@ public void TestDstDataNull()
             Assert.Throws<ArgumentNullException>(() => m.Eval(src, dst));
         }
         
-        [Test]
-        public void TestDstWrongShape()
-        {
-            Multinomial m = new Multinomial(2018);
-            
-            TensorProxy src = new TensorProxy
-            {
-                ValueType = TensorProxy.TensorType.FloatingPoint,
-                Data = new Tensor(0,1)
-            };
-            TensorProxy dst = new TensorProxy
-            {
-                ValueType = TensorProxy.TensorType.FloatingPoint,
-                Data = new Tensor(0,2)
-            };
-
-            Assert.Throws<ArgumentException>(() => m.Eval(src, dst));
-        }
-
         [Test]
         public void TestUnequalBatchSize()
         {
diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
@@ -95,7 +95,7 @@ public abstract class Academy : MonoBehaviour
         [SerializeField]
         public BroadcastHub broadcastHub = new BroadcastHub();
 
-        private const string kApiVersion = "API-8";
+        private const string kApiVersion = "API-9";
 
         /// Temporary storage for global gravity value
         /// Used to restore oringal value when deriving Academy modifies it
diff --git a/config/3dball_generalize.yaml b/config/3dball_generalize.yaml
diff --git a/config/trainer_config.yaml b/config/trainer_config.yaml
@@ -16,7 +16,7 @@ default:
     sequence_length: 64
     summary_freq: 1000
     use_recurrent: false
-    vis_encode_type: default
+    vis_encode_type: simple
     reward_signals: 
         extrinsic:
             strength: 1.0
diff --git a/docs/ML-Agents-Overview.md b/docs/ML-Agents-Overview.md
@@ -320,7 +320,8 @@ actions from the human player to learn a policy. [Video
 Link](https://youtu.be/kpb8ZkMBFYs).
 
 ML-Agents provides ways to both learn directly from demonstrations as well as
-use demonstrations to help speed up reward-based training. The
+use demonstrations to help speed up reward-based training, and two algorithms to do
+so (Generative Adversarial Imitation Learning and Behavioral Cloning). The
 [Training with Imitation Learning](Training-Imitation-Learning.md) tutorial
 covers these features in more depth.
 
@@ -421,6 +422,14 @@ training process.
   the broadcasting feature
   [here](Learning-Environment-Design-Brains.md#using-the-broadcast-feature).
 
+- **Training with Environment Parameter Sampling** - To train agents to be robust
+  to changes in its environment (i.e., generalization), the agent should be exposed
+  to a variety of environment variations. Similarly to Curriculum Learning, which
+  allows environments to get more difficult as the agent learns, we also provide
+  a way to randomly resample aspects of the environment during training. See
+  [Training with Environment Parameter Sampling](Training-Generalization-Learning.md)
+  to learn more about this feature.
+
 - **Docker Set-up (Experimental)** - To facilitate setting up ML-Agents without
   installing Python or TensorFlow directly, we provide a
   [guide](Using-Docker.md) on how to create and run a Docker container.
diff --git a/docs/Training-Generalization-Learning.md b/docs/Training-Generalization-Learning.md
@@ -18,8 +18,9 @@ Ball scale of 0.5          |  Ball scale of 4
 _Variations of the 3D Ball environment._
 
 To vary environments, we first decide what parameters to vary in an
-environment. These parameters are known as `Reset Parameters`. In the 3D ball 
-environment example displayed in the figure above, the reset parameters are `gravity`, `ball_mass` and `ball_scale`.
+environment. We call these parameters `Reset Parameters`. In the 3D ball 
+environment example displayed in the figure above, the reset parameters are 
+`gravity`, `ball_mass` and `ball_scale`.
 
 
 ## How-to
@@ -31,17 +32,17 @@ can be done either deterministically or randomly.
 This is done by assigning each reset parameter a sampler, which samples a reset
 parameter value (such as a uniform sampler). If a sampler isn't provided for a
 reset parameter, the parameter maintains the default value throughout the 
-training, remaining unchanged. The samplers for all the reset parameters are
-handled by a **Sampler Manager**, which also handles the generation of new 
+training procedure, remaining unchanged. The samplers for all the reset parameters 
+are handled by a **Sampler Manager**, which also handles the generation of new 
 values for the reset parameters when needed. 
 
 To setup the Sampler Manager, we setup a YAML file that specifies how we wish to 
 generate new samples. In this file, we specify the samplers and the 
-`resampling-duration` (number of simulation steps after which reset parameters are 
+`resampling-interval` (number of simulation steps after which reset parameters are 
 resampled). Below is an example of a sampler file for the 3D ball environment.
 
 ```yaml
-episode-length: 5000
+resampling-interval: 5000
 
 mass:
     sampler-type: "uniform"
@@ -59,7 +60,7 @@ scale:
 
 ```
 
-* `resampling-duration` (int) - Specifies the number of steps for agent to 
+* `resampling-interval` (int) - Specifies the number of steps for agent to 
 train under a particular environment configuration before resetting the 
 environment with a new sample of reset parameters.
 
@@ -77,8 +78,40 @@ environment, then this specification will be ignored.
     key under the `multirange_uniform` sampler for the gravity reset parameter. 
     The key name should match the name of the corresponding argument in the sampler definition. (Look at defining a new sampler method)
 
+
 The sampler manager allocates a sampler for a reset parameter by using the *Sampler Factory*, which maintains a dictionary mapping of string keys to sampler objects. The available samplers to be used for reset parameter resampling is as available in the Sampler Factory.
 
+#### Possible Sampler Types
+
+The currently implemented samplers that can be used with the `sampler-type` arguments are:
+
+* `uniform` - Uniform sampler
+    *   Uniformly samples a single float value between defined endpoints. 
+        The sub-arguments for this sampler to specify the interval 
+        endpoints are as below. The sampling is done in the range of 
+        [`min_value`, `max_value`).
+
+    * **sub-arguments** - `min_value`, `max_value`
+
+* `gaussian` - Gaussian sampler 
+    *   Samples a single float value from the distribution characterized by
+        the mean and standard deviation. The sub-arguments to specify the 
+        gaussian distribution to use are as below.
+
+    * **sub-arguments** - `mean`, `st_dev`
+
+* `multirange_uniform` - Multirange Uniform sampler
+    *   Uniformly samples a single float value between the specified intervals. 
+        Samples by first performing a weight pick of an interval from the list 
+        of intervals (weighted based on interval width) and samples uniformly 
+        from the selected interval (half-closed interval, same as the uniform 
+        sampler). This sampler can take an arbitrary number of intervals in a 
+        list in the following format: 
+    [[`interval_1_min`, `interval_1_max`], [`interval_2_min`, `interval_2_max`], ...]
+    
+    * **sub-arguments** - `intervals`
+
+
 The implementation of the samplers can be found at `ml-agents-envs/mlagents/envs/sampler_class.py`.
 
 ### Defining a new sampler method
@@ -115,10 +148,10 @@ With the sampler file setup, we can proceed to train our agent as explained in t
 
 ### Training with Generalization Learning
 
-We first begin with setting up the sampler file. After the sampler file is defined and configured, we proceed by launching `mlagents-learn` and specify our configured sampler file with the `--sampler` flag. To demonstrate, if we wanted to train a 3D ball agent with generalization using the `config/generalization-test.yaml` sampling setup, we can run
+We first begin with setting up the sampler file. After the sampler file is defined and configured, we proceed by launching `mlagents-learn` and specify our configured sampler file with the `--sampler` flag. To demonstrate, if we wanted to train a 3D ball agent with generalization using the `config/3dball_generalize.yaml` sampling setup, we can run
 
 ```sh
-mlagents-learn config/trainer_config.yaml --sampler=config/generalize_test.yaml --run-id=3D-Ball-generalization --train
+mlagents-learn config/trainer_config.yaml --sampler=config/3dball_generalize.yaml --run-id=3D-Ball-generalization --train
 ```
 
 We can observe progress and metrics via Tensorboard.
diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md
@@ -196,7 +196,7 @@ are conducting, see:
 * [Training with PPO](Training-PPO.md)
 * [Using Recurrent Neural Networks](Feature-Memory.md)
 * [Training with Curriculum Learning](Training-Curriculum-Learning.md)
-* [Training with Generalization](Training-Generalization-Learning.md)
+* [Training with Environment Parameter Sampling](Training-Generalization-Learning.md)
 * [Training with Imitation Learning](Training-Imitation-Learning.md)
 
 You can also compare the
diff --git a/gym-unity/setup.py b/gym-unity/setup.py
@@ -4,12 +4,12 @@
 
 setup(
     name="gym_unity",
-    version="0.4.2",
+    version="0.4.3",
     description="Unity Machine Learning Agents Gym Interface",
     license="Apache License 2.0",
     author="Unity Technologies",
     author_email="ML-Agents@unity3d.com",
     url="https://github.com/Unity-Technologies/ml-agents",
     packages=find_packages(),
-    install_requires=["gym", "mlagents_envs==0.8.2"],
+    install_requires=["gym", "mlagents_envs==0.9.0"],
 )
diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py
@@ -69,7 +69,7 @@ def __init__(
         atexit.register(self._close)
         self.port = base_port + worker_id
         self._buffer_size = 12000
-        self._version_ = "API-8"
+        self._version_ = "API-9"
         self._loaded = (
             False
         )  # If true, this means the environment was successfully loaded
diff --git a/ml-agents-envs/mlagents/envs/mock_communicator.py b/ml-agents-envs/mlagents/envs/mock_communicator.py
@@ -54,7 +54,7 @@ def initialize(self, inputs: UnityInput) -> UnityOutput:
             is_training=True,
         )
         rl_init = UnityRLInitializationOutput(
-            name="RealFakeAcademy", version="API-8", log_path="", brain_parameters=[bp]
+            name="RealFakeAcademy", version="API-9", log_path="", brain_parameters=[bp]
         )
         return UnityOutput(rl_initialization_output=rl_init)
 
diff --git a/ml-agents-envs/setup.py b/ml-agents-envs/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="mlagents_envs",
-    version="0.8.2",
+    version="0.9.0",
     description="Unity Machine Learning Agents Interface",
     url="https://github.com/Unity-Technologies/ml-agents",
     author="Unity Technologies",
diff --git a/ml-agents/setup.py b/ml-agents/setup.py