Skip to content

Commit 556adad

Browse files
andrewcohChris Elionsurfnerd
authored
Release 6 fix nan (#4343)
* test initalize steps to 100 * use mean of first trajectory to initialize the normalizer * remove blank line * update changelog * cleaned up initialization of variance/mean * large normalization obs unit test * add --upgrade to pip to get newer downloader (#4338) * Fix format of the changelog for validation. (#4340) Co-authored-by: Chris Elion <[email protected]> Co-authored-by: Chris Goy <[email protected]>
1 parent 02175a1 commit 556adad

File tree

6 files changed

+148
-15
lines changed

6 files changed

+148
-15
lines changed

.yamato/com.unity.ml-agents-performance.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Run_Mac_Perfomance_Tests{{ editor.version }}:
1212
variables:
1313
UNITY_VERSION: {{ editor.version }}
1414
commands:
15-
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
15+
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
1616
- unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
1717
- curl -s https://artifactory.internal.unity3d.com/core-automation/tools/utr-standalone/utr --output utr
1818
- chmod +x ./utr

com.unity.ml-agents/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ recursively (for example, by an Agent's CollectObservations method).
3535
Previously, this would result in an infinite loop and cause the editor to hang.
3636
(#4226)
3737
#### ml-agents / ml-agents-envs / gym-unity (Python)
38+
- The algorithm used to normalize observations was introducing NaNs if the initial observations were too large
39+
due to incorrect initialization. The initialization was fixed and is now the observation means from the
40+
first trajectory processed. (#4299)
3841

3942
## [1.2.0-preview] - 2020-07-15
4043

ml-agents/mlagents/trainers/policy/tf_policy.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def __init__(
8383
self.assign_ops: List[tf.Operation] = []
8484
self.update_dict: Dict[str, tf.Tensor] = {}
8585
self.inference_dict: Dict[str, tf.Tensor] = {}
86+
self.first_normalization_update: bool = False
8687

8788
self.graph = tf.Graph()
8889
self.sess = tf.Session(
@@ -453,9 +454,15 @@ def update_normalization(self, vector_obs: np.ndarray) -> None:
453454
:param vector_obs: The vector observations to add to the running estimate of the distribution.
454455
"""
455456
if self.use_vec_obs and self.normalize:
456-
self.sess.run(
457-
self.update_normalization_op, feed_dict={self.vector_in: vector_obs}
458-
)
457+
if self.first_normalization_update:
458+
self.sess.run(
459+
self.init_normalization_op, feed_dict={self.vector_in: vector_obs}
460+
)
461+
self.first_normalization_update = False
462+
else:
463+
self.sess.run(
464+
self.update_normalization_op, feed_dict={self.vector_in: vector_obs}
465+
)
459466

460467
@property
461468
def use_vis_obs(self):
@@ -470,6 +477,7 @@ def _initialize_tensorflow_references(self):
470477
self.normalization_steps: Optional[tf.Variable] = None
471478
self.running_mean: Optional[tf.Variable] = None
472479
self.running_variance: Optional[tf.Variable] = None
480+
self.init_normalization_op: Optional[tf.Operation] = None
473481
self.update_normalization_op: Optional[tf.Operation] = None
474482
self.value: Optional[tf.Tensor] = None
475483
self.all_log_probs: tf.Tensor = None
@@ -495,8 +503,10 @@ def create_input_placeholders(self):
495503
self.behavior_spec.observation_shapes
496504
)
497505
if self.normalize:
506+
self.first_normalization_update = True
498507
normalization_tensors = ModelUtils.create_normalizer(self.vector_in)
499508
self.update_normalization_op = normalization_tensors.update_op
509+
self.init_normalization_op = normalization_tensors.init_op
500510
self.normalization_steps = normalization_tensors.steps
501511
self.running_mean = normalization_tensors.running_mean
502512
self.running_variance = normalization_tensors.running_variance

ml-agents/mlagents/trainers/tests/mock_brain.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ def make_fake_trajectory(
123123
memory=memory,
124124
)
125125
steps_list.append(experience)
126+
obs = []
127+
for _shape in observation_shapes:
128+
obs.append(np.ones(_shape, dtype=np.float32))
126129
last_experience = AgentExperience(
127130
obs=obs,
128131
reward=reward,

ml-agents/mlagents/trainers/tests/test_nn_policy.py

Lines changed: 107 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
2323
BUFFER_INIT_SAMPLES = 32
2424
NUM_AGENTS = 12
25+
EPSILON = 1e-7
2526

2627

2728
def create_policy_mock(
@@ -136,11 +137,112 @@ def test_policy_evaluate(rnn, visual, discrete):
136137
assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)
137138

138139

139-
def test_normalization():
140+
def test_large_normalization():
140141
behavior_spec = mb.setup_test_behavior_specs(
141142
use_discrete=True, use_visual=False, vector_action_space=[2], vector_obs_space=1
142143
)
144+
# Taken from Walker seed 3713 which causes NaN without proper initialization
145+
large_obs1 = [
146+
1800.00036621,
147+
1799.96972656,
148+
1800.01245117,
149+
1800.07214355,
150+
1800.02758789,
151+
1799.98303223,
152+
1799.88647461,
153+
1799.89575195,
154+
1800.03479004,
155+
1800.14025879,
156+
1800.17675781,
157+
1800.20581055,
158+
1800.33740234,
159+
1800.36450195,
160+
1800.43457031,
161+
1800.45544434,
162+
1800.44604492,
163+
1800.56713867,
164+
1800.73901367,
165+
]
166+
large_obs2 = [
167+
1799.99975586,
168+
1799.96679688,
169+
1799.92980957,
170+
1799.89550781,
171+
1799.93774414,
172+
1799.95300293,
173+
1799.94067383,
174+
1799.92993164,
175+
1799.84057617,
176+
1799.69873047,
177+
1799.70605469,
178+
1799.82849121,
179+
1799.85095215,
180+
1799.76977539,
181+
1799.78283691,
182+
1799.76708984,
183+
1799.67163086,
184+
1799.59191895,
185+
1799.5135498,
186+
1799.45556641,
187+
1799.3717041,
188+
]
189+
policy = TFPolicy(
190+
0,
191+
behavior_spec,
192+
TrainerSettings(network_settings=NetworkSettings(normalize=True)),
193+
"testdir",
194+
False,
195+
)
196+
time_horizon = len(large_obs1)
197+
trajectory = make_fake_trajectory(
198+
length=time_horizon,
199+
max_step_complete=True,
200+
observation_shapes=[(1,)],
201+
action_space=[2],
202+
)
203+
for i in range(time_horizon):
204+
trajectory.steps[i].obs[0] = np.array([large_obs1[i]], dtype=np.float32)
205+
trajectory_buffer = trajectory.to_agentbuffer()
206+
policy.update_normalization(trajectory_buffer["vector_obs"])
143207

208+
# Check that the running mean and variance is correct
209+
steps, mean, variance = policy.sess.run(
210+
[policy.normalization_steps, policy.running_mean, policy.running_variance]
211+
)
212+
assert mean[0] == pytest.approx(np.mean(large_obs1, dtype=np.float32), abs=0.01)
213+
assert variance[0] / steps == pytest.approx(
214+
np.var(large_obs1, dtype=np.float32), abs=0.01
215+
)
216+
217+
time_horizon = len(large_obs2)
218+
trajectory = make_fake_trajectory(
219+
length=time_horizon,
220+
max_step_complete=True,
221+
observation_shapes=[(1,)],
222+
action_space=[2],
223+
)
224+
for i in range(time_horizon):
225+
trajectory.steps[i].obs[0] = np.array([large_obs2[i]], dtype=np.float32)
226+
227+
trajectory_buffer = trajectory.to_agentbuffer()
228+
policy.update_normalization(trajectory_buffer["vector_obs"])
229+
230+
steps, mean, variance = policy.sess.run(
231+
[policy.normalization_steps, policy.running_mean, policy.running_variance]
232+
)
233+
234+
assert mean[0] == pytest.approx(
235+
np.mean(large_obs1 + large_obs2, dtype=np.float32), abs=0.01
236+
)
237+
assert variance[0] / steps == pytest.approx(
238+
np.var(large_obs1 + large_obs2, dtype=np.float32), abs=0.01
239+
)
240+
241+
242+
def test_normalization():
243+
behavior_spec = mb.setup_test_behavior_specs(
244+
use_discrete=True, use_visual=False, vector_action_space=[2], vector_obs_space=1
245+
)
144246
time_horizon = 6
145247
trajectory = make_fake_trajectory(
146248
length=time_horizon,
@@ -169,10 +271,9 @@ def test_normalization():
169271

170272
assert steps == 6
171273
assert mean[0] == 0.5
172-
# Note: variance is divided by number of steps, and initialized to 1 to avoid
173-
# divide by 0. The right answer is 0.25
174-
assert (variance[0] - 1) / steps == 0.25
175-
274+
# Note: variance is initalized to the variance of the initial trajectory + EPSILON
275+
# (to avoid divide by 0) and multiplied by the number of steps. The correct answer is 0.25
276+
assert variance[0] / steps == pytest.approx(0.25, abs=0.01)
176277
# Make another update, this time with all 1's
177278
time_horizon = 10
178279
trajectory = make_fake_trajectory(
@@ -191,7 +292,7 @@ def test_normalization():
191292

192293
assert steps == 16
193294
assert mean[0] == 0.8125
194-
assert (variance[0] - 1) / steps == pytest.approx(0.152, abs=0.01)
295+
assert variance[0] / steps == pytest.approx(0.152, abs=0.01)
195296

196297

197298
def test_min_visual_size():

ml-agents/mlagents/trainers/tf/models.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class Tensor3DShape(NamedTuple):
2121

2222

2323
class NormalizerTensors(NamedTuple):
24+
init_op: tf.Operation
2425
update_op: tf.Operation
2526
steps: tf.Tensor
2627
running_mean: tf.Tensor
@@ -187,8 +188,8 @@ def create_normalizer(vector_obs: tf.Tensor) -> NormalizerTensors:
187188
:return: A NormalizerTensors tuple that holds running mean, running variance, number of steps,
188189
and the update operation.
189190
"""
190-
191191
vec_obs_size = vector_obs.shape[1]
192+
192193
steps = tf.get_variable(
193194
"normalization_steps",
194195
[],
@@ -210,11 +211,15 @@ def create_normalizer(vector_obs: tf.Tensor) -> NormalizerTensors:
210211
dtype=tf.float32,
211212
initializer=tf.ones_initializer(),
212213
)
213-
update_normalization = ModelUtils.create_normalizer_update(
214+
initialize_normalization, update_normalization = ModelUtils.create_normalizer_update(
214215
vector_obs, steps, running_mean, running_variance
215216
)
216217
return NormalizerTensors(
217-
update_normalization, steps, running_mean, running_variance
218+
initialize_normalization,
219+
update_normalization,
220+
steps,
221+
running_mean,
222+
running_variance,
218223
)
219224

220225
@staticmethod
@@ -223,7 +228,7 @@ def create_normalizer_update(
223228
steps: tf.Tensor,
224229
running_mean: tf.Tensor,
225230
running_variance: tf.Tensor,
226-
) -> tf.Operation:
231+
) -> Tuple[tf.Operation, tf.Operation]:
227232
"""
228233
Creates the update operation for the normalizer.
229234
:param vector_input: Vector observation to use for updating the running mean and variance.
@@ -250,7 +255,18 @@ def create_normalizer_update(
250255
update_mean = tf.assign(running_mean, new_mean)
251256
update_variance = tf.assign(running_variance, new_variance)
252257
update_norm_step = tf.assign(steps, total_new_steps)
253-
return tf.group([update_mean, update_variance, update_norm_step])
258+
# First mean and variance calculated normally
259+
initial_mean, initial_variance = tf.nn.moments(vector_input, axes=[0])
260+
initialize_mean = tf.assign(running_mean, initial_mean)
261+
# Multiplied by total_new_step because it is divided by total_new_step in the normalization
262+
initialize_variance = tf.assign(
263+
running_variance,
264+
(initial_variance + EPSILON) * tf.cast(total_new_steps, dtype=tf.float32),
265+
)
266+
return (
267+
tf.group([initialize_mean, initialize_variance, update_norm_step]),
268+
tf.group([update_mean, update_variance, update_norm_step]),
269+
)
254270

255271
@staticmethod
256272
def create_vector_observation_encoder(

0 commit comments

Comments
 (0)