Skip to content

Commit bcc2d1c

Browse files
authored
Merge pull request #2346 from Unity-Technologies/release-0.9.0
Merge latest fixes from release into develop
2 parents fdc4ba5 + 7a2a922 commit bcc2d1c

File tree

15 files changed

+92
-56
lines changed

15 files changed

+92
-56
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ developer communities.
2727
* 10+ sample Unity environments
2828
* Support for multiple environment configurations and training scenarios
2929
* Train memory-enhanced agents using deep reinforcement learning
30-
* Easily definable Curriculum Learning scenarios
30+
* Easily definable Curriculum Learning and Generalization scenarios
3131
* Broadcasting of agent behavior for supervised learning
3232
* Built-in support for Imitation Learning
3333
* Flexible agent control with On Demand Decision Making
@@ -77,11 +77,11 @@ If you run into any problems using the ML-Agents toolkit,
7777
[submit an issue](https://github.com/Unity-Technologies/ml-agents/issues) and
7878
make sure to include as much detail as possible.
7979

80-
Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).
80+
Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).
8181

8282

8383
For any other questions or feedback, connect directly with the ML-Agents
84-
84+
8585

8686
## Translations
8787

UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,10 @@ private Dictionary<Agent, AgentInfo> GetFakeAgentInfos()
3636
public void Contruction()
3737
{
3838
var bp = new BrainParameters();
39-
var tensorGenerator = new TensorApplier(bp, 0, new TensorCachingAllocator());
39+
var alloc = new TensorCachingAllocator();
40+
var tensorGenerator = new TensorApplier(bp, 0, alloc);
4041
Assert.IsNotNull(tensorGenerator);
42+
alloc.Dispose();
4143
}
4244

4345
[Test]
@@ -76,8 +78,8 @@ public void ApplyDiscreteActionOutput()
7678
4f, 5f, 6f, 7f, 8f})
7779
};
7880
var agentInfos = GetFakeAgentInfos();
79-
80-
var applier = new DiscreteActionOutputApplier(new int[]{2, 3}, 0, new TensorCachingAllocator());
81+
var alloc = new TensorCachingAllocator();
82+
var applier = new DiscreteActionOutputApplier(new int[]{2, 3}, 0, alloc);
8183
applier.Apply(inputTensor, agentInfos);
8284
var agents = agentInfos.Keys.ToList();
8385
var agent = agents[0] as TestAgent;
@@ -88,6 +90,7 @@ public void ApplyDiscreteActionOutput()
8890
action = agent.GetAction();
8991
Assert.AreEqual(action.vectorActions[0], 1);
9092
Assert.AreEqual(action.vectorActions[1], 2);
93+
alloc.Dispose();
9194
}
9295

9396
[Test]

UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,30 +44,36 @@ private Dictionary<Agent, AgentInfo> GetFakeAgentInfos()
4444
public void Contruction()
4545
{
4646
var bp = new BrainParameters();
47-
var tensorGenerator = new TensorGenerator(bp, 0, new TensorCachingAllocator());
47+
var alloc = new TensorCachingAllocator();
48+
var tensorGenerator = new TensorGenerator(bp, 0, alloc);
4849
Assert.IsNotNull(tensorGenerator);
50+
alloc.Dispose();
4951
}
5052

5153
[Test]
5254
public void GenerateBatchSize()
5355
{
5456
var inputTensor = new TensorProxy();
57+
var alloc = new TensorCachingAllocator();
5558
var batchSize = 4;
56-
var generator = new BatchSizeGenerator(new TensorCachingAllocator());
59+
var generator = new BatchSizeGenerator(alloc);
5760
generator.Generate(inputTensor, batchSize, null);
5861
Assert.IsNotNull(inputTensor.Data);
5962
Assert.AreEqual(inputTensor.Data[0], batchSize);
63+
alloc.Dispose();
6064
}
6165

6266
[Test]
6367
public void GenerateSequenceLength()
6468
{
6569
var inputTensor = new TensorProxy();
70+
var alloc = new TensorCachingAllocator();
6671
var batchSize = 4;
67-
var generator = new SequenceLengthGenerator(new TensorCachingAllocator());
72+
var generator = new SequenceLengthGenerator(alloc);
6873
generator.Generate(inputTensor, batchSize, null);
6974
Assert.IsNotNull(inputTensor.Data);
7075
Assert.AreEqual(inputTensor.Data[0], 1);
76+
alloc.Dispose();
7177
}
7278

7379
[Test]
@@ -79,14 +85,15 @@ public void GenerateVectorObservation()
7985
};
8086
var batchSize = 4;
8187
var agentInfos = GetFakeAgentInfos();
82-
83-
var generator = new VectorObservationGenerator(new TensorCachingAllocator());
88+
var alloc = new TensorCachingAllocator();
89+
var generator = new VectorObservationGenerator(alloc);
8490
generator.Generate(inputTensor, batchSize, agentInfos);
8591
Assert.IsNotNull(inputTensor.Data);
8692
Assert.AreEqual(inputTensor.Data[0, 0], 1);
8793
Assert.AreEqual(inputTensor.Data[0, 2], 3);
8894
Assert.AreEqual(inputTensor.Data[1, 0], 4);
8995
Assert.AreEqual(inputTensor.Data[1, 2], 6);
96+
alloc.Dispose();
9097
}
9198

9299
[Test]
@@ -98,14 +105,15 @@ public void GenerateRecurrentInput()
98105
};
99106
var batchSize = 4;
100107
var agentInfos = GetFakeAgentInfos();
101-
102-
var generator = new RecurrentInputGenerator(new TensorCachingAllocator());
108+
var alloc = new TensorCachingAllocator();
109+
var generator = new RecurrentInputGenerator(alloc);
103110
generator.Generate(inputTensor, batchSize, agentInfos);
104111
Assert.IsNotNull(inputTensor.Data);
105112
Assert.AreEqual(inputTensor.Data[0, 0], 0);
106113
Assert.AreEqual(inputTensor.Data[0, 4], 0);
107114
Assert.AreEqual(inputTensor.Data[1, 0], 1);
108115
Assert.AreEqual(inputTensor.Data[1, 4], 0);
116+
alloc.Dispose();
109117
}
110118

111119
[Test]
@@ -119,15 +127,16 @@ public void GeneratePreviousActionInput()
119127
};
120128
var batchSize = 4;
121129
var agentInfos = GetFakeAgentInfos();
122-
123-
var generator = new PreviousActionInputGenerator(new TensorCachingAllocator());
130+
var alloc = new TensorCachingAllocator();
131+
var generator = new PreviousActionInputGenerator(alloc);
124132

125133
generator.Generate(inputTensor, batchSize, agentInfos);
126134
Assert.IsNotNull(inputTensor.Data);
127135
Assert.AreEqual(inputTensor.Data[0, 0], 1);
128136
Assert.AreEqual(inputTensor.Data[0, 1], 2);
129137
Assert.AreEqual(inputTensor.Data[1, 0], 3);
130138
Assert.AreEqual(inputTensor.Data[1, 1], 4);
139+
alloc.Dispose();
131140
}
132141

133142
[Test]
@@ -141,14 +150,15 @@ public void GenerateActionMaskInput()
141150
};
142151
var batchSize = 4;
143152
var agentInfos = GetFakeAgentInfos();
144-
145-
var generator = new ActionMaskInputGenerator(new TensorCachingAllocator());
153+
var alloc = new TensorCachingAllocator();
154+
var generator = new ActionMaskInputGenerator(alloc);
146155
generator.Generate(inputTensor, batchSize, agentInfos);
147156
Assert.IsNotNull(inputTensor.Data);
148157
Assert.AreEqual(inputTensor.Data[0, 0], 1);
149158
Assert.AreEqual(inputTensor.Data[0, 4], 1);
150159
Assert.AreEqual(inputTensor.Data[1, 0], 0);
151160
Assert.AreEqual(inputTensor.Data[1, 4], 1);
161+
alloc.Dispose();
152162
}
153163
}
154164
}

UnitySDK/Assets/ML-Agents/Editor/Tests/MultinomialTest.cs

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -160,25 +160,6 @@ public void TestDstDataNull()
160160
Assert.Throws<ArgumentNullException>(() => m.Eval(src, dst));
161161
}
162162

163-
[Test]
164-
public void TestDstWrongShape()
165-
{
166-
Multinomial m = new Multinomial(2018);
167-
168-
TensorProxy src = new TensorProxy
169-
{
170-
ValueType = TensorProxy.TensorType.FloatingPoint,
171-
Data = new Tensor(0,1)
172-
};
173-
TensorProxy dst = new TensorProxy
174-
{
175-
ValueType = TensorProxy.TensorType.FloatingPoint,
176-
Data = new Tensor(0,2)
177-
};
178-
179-
Assert.Throws<ArgumentException>(() => m.Eval(src, dst));
180-
}
181-
182163
[Test]
183164
public void TestUnequalBatchSize()
184165
{

UnitySDK/Assets/ML-Agents/Scripts/Academy.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ public abstract class Academy : MonoBehaviour
9595
[SerializeField]
9696
public BroadcastHub broadcastHub = new BroadcastHub();
9797

98-
private const string kApiVersion = "API-8";
98+
private const string kApiVersion = "API-9";
9999

100100
/// Temporary storage for global gravity value
101101
/// Used to restore oringal value when deriving Academy modifies it
File renamed without changes.

config/trainer_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ default:
1616
sequence_length: 64
1717
summary_freq: 1000
1818
use_recurrent: false
19-
vis_encode_type: default
19+
vis_encode_type: simple
2020
reward_signals:
2121
extrinsic:
2222
strength: 1.0

docs/ML-Agents-Overview.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,8 @@ actions from the human player to learn a policy. [Video
320320
Link](https://youtu.be/kpb8ZkMBFYs).
321321

322322
ML-Agents provides ways to both learn directly from demonstrations as well as
323-
use demonstrations to help speed up reward-based training. The
323+
use demonstrations to help speed up reward-based training, and two algorithms to do
324+
so (Generative Adversarial Imitation Learning and Behavioral Cloning). The
324325
[Training with Imitation Learning](Training-Imitation-Learning.md) tutorial
325326
covers these features in more depth.
326327

@@ -421,6 +422,14 @@ training process.
421422
the broadcasting feature
422423
[here](Learning-Environment-Design-Brains.md#using-the-broadcast-feature).
423424

425+
- **Training with Environment Parameter Sampling** - To train agents to be robust
426+
to changes in its environment (i.e., generalization), the agent should be exposed
427+
to a variety of environment variations. Similarly to Curriculum Learning, which
428+
allows environments to get more difficult as the agent learns, we also provide
429+
a way to randomly resample aspects of the environment during training. See
430+
[Training with Environment Parameter Sampling](Training-Generalization-Learning.md)
431+
to learn more about this feature.
432+
424433
- **Docker Set-up (Experimental)** - To facilitate setting up ML-Agents without
425434
installing Python or TensorFlow directly, we provide a
426435
[guide](Using-Docker.md) on how to create and run a Docker container.

docs/Training-Generalization-Learning.md

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ Ball scale of 0.5 | Ball scale of 4
1818
_Variations of the 3D Ball environment._
1919

2020
To vary environments, we first decide what parameters to vary in an
21-
environment. These parameters are known as `Reset Parameters`. In the 3D ball
22-
environment example displayed in the figure above, the reset parameters are `gravity`, `ball_mass` and `ball_scale`.
21+
environment. We call these parameters `Reset Parameters`. In the 3D ball
22+
environment example displayed in the figure above, the reset parameters are
23+
`gravity`, `ball_mass` and `ball_scale`.
2324

2425

2526
## How-to
@@ -31,17 +32,17 @@ can be done either deterministically or randomly.
3132
This is done by assigning each reset parameter a sampler, which samples a reset
3233
parameter value (such as a uniform sampler). If a sampler isn't provided for a
3334
reset parameter, the parameter maintains the default value throughout the
34-
training, remaining unchanged. The samplers for all the reset parameters are
35-
handled by a **Sampler Manager**, which also handles the generation of new
35+
training procedure, remaining unchanged. The samplers for all the reset parameters
36+
are handled by a **Sampler Manager**, which also handles the generation of new
3637
values for the reset parameters when needed.
3738

3839
To setup the Sampler Manager, we setup a YAML file that specifies how we wish to
3940
generate new samples. In this file, we specify the samplers and the
40-
`resampling-duration` (number of simulation steps after which reset parameters are
41+
`resampling-interval` (number of simulation steps after which reset parameters are
4142
resampled). Below is an example of a sampler file for the 3D ball environment.
4243

4344
```yaml
44-
episode-length: 5000
45+
resampling-interval: 5000
4546

4647
mass:
4748
sampler-type: "uniform"
@@ -59,7 +60,7 @@ scale:
5960

6061
```
6162

62-
* `resampling-duration` (int) - Specifies the number of steps for agent to
63+
* `resampling-interval` (int) - Specifies the number of steps for agent to
6364
train under a particular environment configuration before resetting the
6465
environment with a new sample of reset parameters.
6566

@@ -77,8 +78,40 @@ environment, then this specification will be ignored.
7778
key under the `multirange_uniform` sampler for the gravity reset parameter.
7879
The key name should match the name of the corresponding argument in the sampler definition. (Look at defining a new sampler method)
7980

81+
8082
The sampler manager allocates a sampler for a reset parameter by using the *Sampler Factory*, which maintains a dictionary mapping of string keys to sampler objects. The available samplers to be used for reset parameter resampling is as available in the Sampler Factory.
8183

84+
#### Possible Sampler Types
85+
86+
The currently implemented samplers that can be used with the `sampler-type` arguments are:
87+
88+
* `uniform` - Uniform sampler
89+
* Uniformly samples a single float value between defined endpoints.
90+
The sub-arguments for this sampler to specify the interval
91+
endpoints are as below. The sampling is done in the range of
92+
[`min_value`, `max_value`).
93+
94+
* **sub-arguments** - `min_value`, `max_value`
95+
96+
* `gaussian` - Gaussian sampler
97+
* Samples a single float value from the distribution characterized by
98+
the mean and standard deviation. The sub-arguments to specify the
99+
gaussian distribution to use are as below.
100+
101+
* **sub-arguments** - `mean`, `st_dev`
102+
103+
* `multirange_uniform` - Multirange Uniform sampler
104+
* Uniformly samples a single float value between the specified intervals.
105+
Samples by first performing a weight pick of an interval from the list
106+
of intervals (weighted based on interval width) and samples uniformly
107+
from the selected interval (half-closed interval, same as the uniform
108+
sampler). This sampler can take an arbitrary number of intervals in a
109+
list in the following format:
110+
[[`interval_1_min`, `interval_1_max`], [`interval_2_min`, `interval_2_max`], ...]
111+
112+
* **sub-arguments** - `intervals`
113+
114+
82115
The implementation of the samplers can be found at `ml-agents-envs/mlagents/envs/sampler_class.py`.
83116

84117
### Defining a new sampler method
@@ -115,10 +148,10 @@ With the sampler file setup, we can proceed to train our agent as explained in t
115148
116149
### Training with Generalization Learning
117150
118-
We first begin with setting up the sampler file. After the sampler file is defined and configured, we proceed by launching `mlagents-learn` and specify our configured sampler file with the `--sampler` flag. To demonstrate, if we wanted to train a 3D ball agent with generalization using the `config/generalization-test.yaml` sampling setup, we can run
151+
We first begin with setting up the sampler file. After the sampler file is defined and configured, we proceed by launching `mlagents-learn` and specify our configured sampler file with the `--sampler` flag. To demonstrate, if we wanted to train a 3D ball agent with generalization using the `config/3dball_generalize.yaml` sampling setup, we can run
119152

120153
```sh
121-
mlagents-learn config/trainer_config.yaml --sampler=config/generalize_test.yaml --run-id=3D-Ball-generalization --train
154+
mlagents-learn config/trainer_config.yaml --sampler=config/3dball_generalize.yaml --run-id=3D-Ball-generalization --train
122155
```
123156

124157
We can observe progress and metrics via Tensorboard.

docs/Training-ML-Agents.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ are conducting, see:
196196
* [Training with PPO](Training-PPO.md)
197197
* [Using Recurrent Neural Networks](Feature-Memory.md)
198198
* [Training with Curriculum Learning](Training-Curriculum-Learning.md)
199-
* [Training with Generalization](Training-Generalization-Learning.md)
199+
* [Training with Environment Parameter Sampling](Training-Generalization-Learning.md)
200200
* [Training with Imitation Learning](Training-Imitation-Learning.md)
201201

202202
You can also compare the

0 commit comments

Comments
 (0)