Skip to content

Commit 73199e8

Browse files
authored
Fix for divide-by-zero error with Discrete Actions (#1520)
* Enable buffer padding to be set other than 0 Allows buffer padding in AgentBufferField to be set to a custom value. In particular, 0-padding for `action_masks` causes a divide-by-zero error, and should be padded with 1’s instead. This is done as a parameter passed to the `append` method, so that the pad value can be set right after the instantiation of an AgentBufferField.
1 parent cb0bfa0 commit 73199e8

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

ml-agents/mlagents/trainers/buffer.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,27 @@ class AgentBufferField(list):
2828
AgentBufferField with the append method.
2929
"""
3030

31+
def __init__(self):
32+
self.padding_value = 0
33+
super(Buffer.AgentBuffer.AgentBufferField, self).__init__()
34+
3135
def __str__(self):
3236
return str(np.array(self).shape)
3337

38+
def append(self, element, padding_value=0):
39+
"""
40+
Adds an element to this list. Also lets you change the padding
41+
type, so that it can be set on append (e.g. action_masks should
42+
be padded with 1.)
43+
:param element: The element to append to the list.
44+
:param padding_value: The value used to pad when get_batch is called.
45+
"""
46+
super(Buffer.AgentBuffer.AgentBufferField, self).append(element)
47+
self.padding_value = padding_value
48+
3449
def extend(self, data):
3550
"""
36-
Ads a list of np.arrays to the end of the list of np.arrays.
51+
Adds a list of np.arrays to the end of the list of np.arrays.
3752
:param data: The np.array list to append.
3853
"""
3954
self += list(np.array(data))
@@ -99,7 +114,7 @@ def get_batch(self, batch_size=None, training_length=1, sequential=True):
99114
raise BufferException("The batch size and training length requested for get_batch where"
100115
" too large given the current number of data points.")
101116
tmp_list = []
102-
padding = np.array(self[-1]) * 0
117+
padding = np.array(self[-1]) * self.padding_value
103118
# The padding is made with zeros and its shape is given by the shape of the last element
104119
for end in range(len(self), len(self) % training_length, -training_length)[:batch_size]:
105120
tmp_list += [np.array(self[end - training_length:end])]

ml-agents/mlagents/trainers/ppo/trainer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def add_experiences(self, curr_all_info: AllBrainInfo, next_all_info: AllBrainIn
224224
epsilons[idx])
225225
else:
226226
self.training_buffer[agent_id]['action_mask'].append(
227-
stored_info.action_masks[idx])
227+
stored_info.action_masks[idx], padding_value=1)
228228
a_dist = stored_take_action_outputs['log_probs']
229229
value = stored_take_action_outputs['value']
230230
self.training_buffer[agent_id]['actions'].append(actions[idx])

0 commit comments

Comments
 (0)