From 0a0deae5464f8054b4e29d3e18f83a41f6ba4164 Mon Sep 17 00:00:00 2001 From: Daniel Ford Date: Sun, 23 Jul 2017 16:26:30 -0700 Subject: [PATCH 1/2] Compatibility with recent TensorFlow versions --- src/demo.py | 1 + src/env_wrapper.py | 2 +- src/model.py | 26 ++++++++++++++++++++------ 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/demo.py b/src/demo.py index f0150a3..2b69a06 100644 --- a/src/demo.py +++ b/src/demo.py @@ -1,5 +1,6 @@ #!/usr/bin/env python from __future__ import print_function +import go_vncdriver import tensorflow as tf import gym import numpy as np diff --git a/src/env_wrapper.py b/src/env_wrapper.py index 6555b25..27b7cb1 100644 --- a/src/env_wrapper.py +++ b/src/env_wrapper.py @@ -151,7 +151,7 @@ def _reset(self): def _step(self, action): obs, reward, done, info = self.env.step(action) # print('info:', info) - done = info['iteration'] > self.resetCount + done = info.get('iteration',-1) > self.resetCount reward = float(reward)/self.maxDistance # note: we do not use this rewards at all. if self.tilesEnv: return obs, reward, done, info diff --git a/src/model.py b/src/model.py index 6f69f9f..db1fd2b 100644 --- a/src/model.py +++ b/src/model.py @@ -4,6 +4,8 @@ import tensorflow.contrib.rnn as rnn from constants import constants +# compatibility with various versions of tf +rnn_cell = rnn.rnn_cell if hasattr(rnn, "rnn_cell") else rnn def normalized_columns_initializer(std=1.0): def _initializer(shape, dtype=None, partition_info=None): @@ -181,7 +183,7 @@ def __init__(self, ob_space, ac_space, designHead='universe'): # introduce a "fake" batch dimension of 1 to do LSTM over time dim x = tf.expand_dims(x, [0]) - lstm = rnn.rnn_cell.BasicLSTMCell(size, state_is_tuple=True) + lstm = rnn_cell.BasicLSTMCell(size, state_is_tuple=True) self.state_size = lstm.state_size step_size = tf.shape(self.x)[:1] @@ -192,7 +194,7 @@ def __init__(self, ob_space, ac_space, designHead='universe'): h_in = tf.placeholder(tf.float32, [1, lstm.state_size.h], name='h_in') self.state_in = [c_in, h_in] - state_in = rnn.rnn_cell.LSTMStateTuple(c_in, h_in) + state_in = rnn_cell.LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn( lstm, x, initial_state=state_in, sequence_length=step_size, time_major=False) @@ -266,17 +268,26 @@ def __init__(self, ob_space, ac_space, designHead='universe'): phi2 = universeHead(phi2) # inverse model: g(phi1,phi2) -> a_inv: [None, ac_space] - g = tf.concat(1,[phi1, phi2]) + try: + # old TF: https://stackoverflow.com/questions/41813665/tensorflow-slim-typeerror-expected-int32-got-list-containing-tensors-of-type + g = tf.concat(1,[phi1, phi2]) + except TypeError: + # new TF + g = tf.concat(axis=1, values=[phi1, phi2]) + g = tf.nn.relu(linear(g, size, "g1", normalized_columns_initializer(0.01))) aindex = tf.argmax(asample, axis=1) # aindex: [batch_size,] logits = linear(g, ac_space, "glast", normalized_columns_initializer(0.01)) self.invloss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( - logits, aindex), name="invloss") + logits=logits, labels=aindex), name="invloss") self.ainvprobs = tf.nn.softmax(logits, dim=-1) # forward model: f(phi1,asample) -> phi2 # Note: no backprop to asample of policy: it is treated as fixed for predictor training - f = tf.concat(1, [phi1, asample]) + try: + f = tf.concat(1, [phi1, asample]) + except TypeError: + f = tf.concat(axis=1, values=[phi1, asample]) f = tf.nn.relu(linear(f, size, "f1", normalized_columns_initializer(0.01))) f = linear(f, phi1.get_shape()[1].value, "flast", normalized_columns_initializer(0.01)) self.forwardloss = 0.5 * tf.reduce_mean(tf.square(tf.subtract(f, phi2)), name='forwardloss') @@ -345,7 +356,10 @@ def __init__(self, ob_space, ac_space, designHead='universe', unsupType='state') # forward model: f(phi1,asample) -> phi2 # Note: no backprop to asample of policy: it is treated as fixed for predictor training - f = tf.concat(1, [phi1, asample]) + try: + f = tf.concat(1, [phi1, asample]) + except TypeError: + f = tf.concat(axis=1, values=[phi1, asample]) f = tf.nn.relu(linear(f, phi1.get_shape()[1].value, "f1", normalized_columns_initializer(0.01))) if 'tile' in designHead: f = inverseUniverseHead(f, input_shape, nConvs=2) From f831bbe59c676c129d65acaeb8493632e8cf7548 Mon Sep 17 00:00:00 2001 From: Daniel Ford Date: Sun, 23 Jul 2017 16:38:15 -0700 Subject: [PATCH 2/2] Also use either call form for sparse_softmax_cross_entropy_with_logits --- src/model.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/model.py b/src/model.py index db1fd2b..b1d462e 100644 --- a/src/model.py +++ b/src/model.py @@ -278,8 +278,12 @@ def __init__(self, ob_space, ac_space, designHead='universe'): g = tf.nn.relu(linear(g, size, "g1", normalized_columns_initializer(0.01))) aindex = tf.argmax(asample, axis=1) # aindex: [batch_size,] logits = linear(g, ac_space, "glast", normalized_columns_initializer(0.01)) - self.invloss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( - logits=logits, labels=aindex), name="invloss") + try: + self.invloss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( + logits, aindex), name="invloss") + except ValueError: + self.invloss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( + logits=logits, labels=aindex), name="invloss") self.ainvprobs = tf.nn.softmax(logits, dim=-1) # forward model: f(phi1,asample) -> phi2