Skip to content

Commit 2753bc5

Browse files
Ruo-Ping DongChris Elion
andauthored
use int64 steps, check for NaN actions (#4607) (#4654)
* use int64 steps * check for NaN actions Co-authored-by: Ruo-Ping Dong <[email protected]> Co-authored-by: Chris Elion <[email protected]>
1 parent 5203cd0 commit 2753bc5

File tree

9 files changed

+52
-5
lines changed

9 files changed

+52
-5
lines changed

com.unity.ml-agents/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ if they are called recursively (for example, if they call `Agent.EndEpisode()`).
3333
Previously, this would result in an infinite loop and cause the editor to hang. (#4573)
3434
#### ml-agents / ml-agents-envs / gym-unity (Python)
3535
- Fixed an issue where runs could not be resumed when using TensorFlow and Ghost Training. (#4593)
36+
- Change the tensor type of step count from int32 to int64 to address the overflow issue when step
37+
goes larger than 2^31. Previous Tensorflow checkpoints will become incompatible and cannot be loaded. (#4607)
3638

3739

3840
## [1.5.0-preview] - 2020-10-14

ml-agents/mlagents/trainers/policy/policy.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,16 @@ def get_action(
141141
) -> ActionInfo:
142142
raise NotImplementedError
143143

144+
@staticmethod
145+
def check_nan_action(action: Optional[np.ndarray]) -> None:
146+
# Fast NaN check on the action
147+
# See https://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy for background.
148+
if action is not None:
149+
d = np.sum(action)
150+
has_nan = np.isnan(d)
151+
if has_nan:
152+
raise RuntimeError("NaN action detected.")
153+
144154
@abstractmethod
145155
def update_normalization(self, vector_obs: np.ndarray) -> None:
146156
pass

ml-agents/mlagents/trainers/policy/tf_policy.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,8 @@ def get_action(
270270
)
271271

272272
self.save_memories(global_agent_ids, run_out.get("memory_out"))
273+
self.check_nan_action(run_out.get("action"))
274+
273275
return ActionInfo(
274276
action=run_out.get("action"),
275277
value=run_out.get("value"),

ml-agents/mlagents/trainers/policy/torch_policy.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ def get_action(
235235
decision_requests, global_agent_ids
236236
) # pylint: disable=assignment-from-no-return
237237
self.save_memories(global_agent_ids, run_out.get("memory_out"))
238+
self.check_nan_action(run_out.get("action"))
238239
return ActionInfo(
239240
action=run_out.get("action"),
240241
value=run_out.get("value"),

ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,5 +265,25 @@ def test_min_visual_size():
265265
enc_func(vis_input, 32, ModelUtils.swish, 1, "test", False)
266266

267267

268+
def test_step_overflow():
269+
behavior_spec = mb.setup_test_behavior_specs(
270+
use_discrete=True, use_visual=False, vector_action_space=[2], vector_obs_space=1
271+
)
272+
273+
policy = TFPolicy(
274+
0,
275+
behavior_spec,
276+
TrainerSettings(network_settings=NetworkSettings(normalize=True)),
277+
create_tf_graph=False,
278+
)
279+
policy.create_input_placeholders()
280+
policy.initialize()
281+
282+
policy.set_step(2 ** 31 - 1)
283+
assert policy.get_current_step() == 2 ** 31 - 1
284+
policy.increment_step(3)
285+
assert policy.get_current_step() == 2 ** 31 + 2
286+
287+
268288
if __name__ == "__main__":
269289
pytest.main()

ml-agents/mlagents/trainers/tests/torch/test_policy.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,3 +145,11 @@ def test_sample_actions(rnn, visual, discrete):
145145

146146
if rnn:
147147
assert memories.shape == (1, 1, policy.m_size)
148+
149+
150+
def test_step_overflow():
151+
policy = create_policy_mock(TrainerSettings())
152+
policy.set_step(2 ** 31 - 1)
153+
assert policy.get_current_step() == 2 ** 31 - 1 # step = 2147483647
154+
policy.increment_step(3)
155+
assert policy.get_current_step() == 2 ** 31 + 2 # step = 2147483650

ml-agents/mlagents/trainers/tf/models.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@ class ModelUtils:
4242
def create_global_steps():
4343
"""Creates TF ops to track and increment global training step."""
4444
global_step = tf.Variable(
45-
0, name="global_step", trainable=False, dtype=tf.int32
45+
0, name="global_step", trainable=False, dtype=tf.int64
4646
)
4747
steps_to_increment = tf.placeholder(
48-
shape=[], dtype=tf.int32, name="steps_to_increment"
48+
shape=[], dtype=tf.int64, name="steps_to_increment"
4949
)
5050
increment_step = tf.assign(global_step, tf.add(global_step, steps_to_increment))
5151
return global_step, increment_step, steps_to_increment
@@ -195,7 +195,7 @@ def create_normalizer(vector_obs: tf.Tensor) -> NormalizerTensors:
195195
"normalization_steps",
196196
[],
197197
trainable=False,
198-
dtype=tf.int32,
198+
dtype=tf.int64,
199199
initializer=tf.zeros_initializer(),
200200
)
201201
running_mean = tf.get_variable(
@@ -244,7 +244,7 @@ def create_normalizer_update(
244244
# Based on Welford's algorithm for running mean and standard deviation, for batch updates. Discussion here:
245245
# https://stackoverflow.com/questions/56402955/whats-the-formula-for-welfords-algorithm-for-variance-std-with-batch-updates
246246
steps_increment = tf.shape(vector_input)[0]
247-
total_new_steps = tf.add(steps, steps_increment)
247+
total_new_steps = tf.add(steps, tf.cast(steps_increment, dtype=tf.int64))
248248

249249
# Compute the incremental update and divide by the number of new steps.
250250
input_to_old_mean = tf.subtract(vector_input, running_mean)

ml-agents/mlagents/trainers/tf/tensorflow_to_barracuda.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,8 @@ def get_tensor_data(tensor):
702702
data = tensor.float_val
703703
if tensor.int_val:
704704
data = np.array(tensor.int_val, dtype=float)
705+
if tensor.int64_val:
706+
data = np.array(tensor.int64_val, dtype=float)
705707
if tensor.bool_val:
706708
data = np.array(tensor.bool_val, dtype=float)
707709
return np.array(data).reshape(dims)

ml-agents/mlagents/trainers/torch/networks.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,9 @@ def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
488488
class GlobalSteps(nn.Module):
489489
def __init__(self):
490490
super().__init__()
491-
self.__global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False)
491+
self.__global_step = nn.Parameter(
492+
torch.Tensor([0]).to(torch.int64), requires_grad=False
493+
)
492494

493495
@property
494496
def current_step(self):

0 commit comments

Comments
 (0)