Fix for visual observation w/ curiosity (#873)

awjuliani · web-flow · commit 5aca9a6fc3a7 · 2018-06-15T15:42:52.000-07:00
diff --git a/python/unitytrainers/ppo/models.py b/python/unitytrainers/ppo/models.py
@@ -137,8 +137,9 @@ def create_forward_model(self, encoded_state, encoded_next_state):
         """
         combined_input = tf.concat([encoded_state, self.selected_actions], axis=1)
         hidden = tf.layers.dense(combined_input, 256, activation=self.swish)
-        # We compare against the concatenation of all observation streams, hence `self.v_size+1`.
-        pred_next_state = tf.layers.dense(hidden, self.curiosity_enc_size * (self.v_size+1), activation=None)
+        # We compare against the concatenation of all observation streams, hence `self.v_size + int(self.o_size > 0)`.
+        pred_next_state = tf.layers.dense(hidden, self.curiosity_enc_size * (self.v_size + int(self.o_size > 0)),
+                                          activation=None)
 
         squared_difference = 0.5 * tf.reduce_sum(tf.squared_difference(pred_next_state, encoded_next_state), axis=1)
         self.intrinsic_reward = tf.clip_by_value(self.curiosity_strength * squared_difference, 0, 1)