Skip to content

Commit 5aca9a6

Browse files
authored
Fix for visual observation w/ curiosity (#873)
1 parent b191a88 commit 5aca9a6

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

python/unitytrainers/ppo/models.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,9 @@ def create_forward_model(self, encoded_state, encoded_next_state):
137137
"""
138138
combined_input = tf.concat([encoded_state, self.selected_actions], axis=1)
139139
hidden = tf.layers.dense(combined_input, 256, activation=self.swish)
140-
# We compare against the concatenation of all observation streams, hence `self.v_size+1`.
141-
pred_next_state = tf.layers.dense(hidden, self.curiosity_enc_size * (self.v_size+1), activation=None)
140+
# We compare against the concatenation of all observation streams, hence `self.v_size + int(self.o_size > 0)`.
141+
pred_next_state = tf.layers.dense(hidden, self.curiosity_enc_size * (self.v_size + int(self.o_size > 0)),
142+
activation=None)
142143

143144
squared_difference = 0.5 * tf.reduce_sum(tf.squared_difference(pred_next_state, encoded_next_state), axis=1)
144145
self.intrinsic_reward = tf.clip_by_value(self.curiosity_strength * squared_difference, 0, 1)

0 commit comments

Comments
 (0)