diff --git a/dnc/controller.py b/dnc/controller.py index dbb5a1ea..d4c62f79 100644 --- a/dnc/controller.py +++ b/dnc/controller.py @@ -190,7 +190,7 @@ def process_input(self, X, last_read_vectors, state=None): """ flat_read_vectors = tf.reshape(last_read_vectors, (-1, self.word_size * self.read_heads)) - complete_input = tf.concat(1, [X, flat_read_vectors]) + complete_input = tf.concat([X, flat_read_vectors], 1) nn_output, nn_state = None, None if self.has_recurrent_nn: diff --git a/dnc/dnc.py b/dnc/dnc.py index 13d0ecfc..fe240830 100644 --- a/dnc/dnc.py +++ b/dnc/dnc.py @@ -1,5 +1,5 @@ import tensorflow as tf -from tensorflow.python.ops.rnn_cell import LSTMStateTuple +from tensorflow.contrib.rnn.python.ops.core_rnn_cell import LSTMStateTuple from memory import Memory import utility import os diff --git a/dnc/memory.py b/dnc/memory.py index fa700987..4bc335a5 100644 --- a/dnc/memory.py +++ b/dnc/memory.py @@ -74,7 +74,7 @@ def get_lookup_weighting(self, memory_matrix, keys, strengths): normalized_memory = tf.nn.l2_normalize(memory_matrix, 2) normalized_keys = tf.nn.l2_normalize(keys, 1) - similiarity = tf.batch_matmul(normalized_memory, normalized_keys) + similiarity = tf.matmul(normalized_memory, normalized_keys) strengths = tf.expand_dims(strengths, 1) return tf.nn.softmax(similiarity * strengths, 1) @@ -131,7 +131,7 @@ def get_allocation_weighting(self, sorted_usage, free_list): flat_unordered_allocation_weighting ) - packed_wightings = flat_ordered_weightings.pack() + packed_wightings = flat_ordered_weightings.stack() return tf.reshape(packed_wightings, (self.batch_size, self.words_num)) @@ -188,8 +188,8 @@ def update_memory(self, memory_matrix, write_weighting, write_vector, erase_vect write_vector = tf.expand_dims(write_vector, 1) erase_vector = tf.expand_dims(erase_vector, 1) - erasing = memory_matrix * (1 - tf.batch_matmul(write_weighting, erase_vector)) - writing = tf.batch_matmul(write_weighting, write_vector) + erasing = memory_matrix * (1 - tf.matmul(write_weighting, erase_vector)) + writing = tf.matmul(write_weighting, write_vector) updated_memory = erasing + writing return updated_memory @@ -239,7 +239,7 @@ def update_link_matrix(self, precedence_vector, link_matrix, write_weighting): precedence_vector = tf.expand_dims(precedence_vector, 1) reset_factor = 1 - utility.pairwise_add(write_weighting, is_batch=True) - updated_link_matrix = reset_factor * link_matrix + tf.batch_matmul(write_weighting, precedence_vector) + updated_link_matrix = reset_factor * link_matrix + tf.matmul(write_weighting, precedence_vector) updated_link_matrix = (1 - self.I) * updated_link_matrix # eliminates self-links return updated_link_matrix @@ -262,8 +262,8 @@ def get_directional_weightings(self, read_weightings, link_matrix): backward weighting: Tensor (batch_size, words_num, read_heads) """ - forward_weighting = tf.batch_matmul(link_matrix, read_weightings) - backward_weighting = tf.batch_matmul(link_matrix, read_weightings, adj_x=True) + forward_weighting = tf.matmul(link_matrix, read_weightings) + backward_weighting = tf.matmul(link_matrix, read_weightings, adjoint_a=True) return forward_weighting, backward_weighting @@ -308,7 +308,7 @@ def update_read_vectors(self, memory_matrix, read_weightings): Returns: Tensor (word_size, read_heads) """ - updated_read_vectors = tf.batch_matmul(memory_matrix, read_weightings, adj_x=True) + updated_read_vectors = tf.matmul(memory_matrix, read_weightings, adjoint_a=True) return updated_read_vectors diff --git a/dnc/utility.py b/dnc/utility.py index 8e852ca1..4e090436 100644 --- a/dnc/utility.py +++ b/dnc/utility.py @@ -34,13 +34,13 @@ def pairwise_add(u, v=None, is_batch=False): n = u_shape[0] if not is_batch else u_shape[1] column_u = tf.reshape(u, (-1, 1) if not is_batch else (-1, n, 1)) - U = tf.concat(1 if not is_batch else 2, [column_u] * n) + U = tf.concat([column_u] * n, 1 if not is_batch else 2) if v is u: return U + tf.transpose(U, None if not is_batch else [0, 2, 1]) else: row_v = tf.reshape(v, (1, -1) if not is_batch else (-1, 1, n)) - V = tf.concat(0 if not is_batch else 1, [row_v] * n) + V = tf.concat([row_v] * n, 0 if not is_batch else 1) return U + V @@ -87,7 +87,7 @@ def unpack_into_tensorarray(value, axis, size=None): array = tf.TensorArray(dtype=dtype, size=array_size) dim_permutation = [axis] + range(1, axis) + [0] + range(axis + 1, rank) unpack_axis_major_value = tf.transpose(value, dim_permutation) - full_array = array.unpack(unpack_axis_major_value) + full_array = array.unstack(unpack_axis_major_value) return full_array @@ -106,7 +106,7 @@ def pack_into_tensor(array, axis): the packed tensor """ - packed_tensor = array.pack() + packed_tensor = array.stack() shape = packed_tensor.get_shape() rank = len(shape) diff --git a/tasks/babi/recurrent_controller.py b/tasks/babi/recurrent_controller.py index de480cb3..7a38b63c 100644 --- a/tasks/babi/recurrent_controller.py +++ b/tasks/babi/recurrent_controller.py @@ -1,5 +1,6 @@ import numpy as np import tensorflow as tf +from tensorflow.contrib.rnn.python.ops.core_rnn_cell import BasicLSTMCell from dnc.controller import BaseController """ @@ -11,7 +12,7 @@ class RecurrentController(BaseController): def network_vars(self): - self.lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(256) + self.lstm_cell = BasicLSTMCell(256) self.state = self.lstm_cell.zero_state(self.batch_size, tf.float32) def network_op(self, X, state): diff --git a/tasks/babi/train.py b/tasks/babi/train.py index 3eaff15f..5ed421ce 100644 --- a/tasks/babi/train.py +++ b/tasks/babi/train.py @@ -34,8 +34,8 @@ def prepare_sample(sample, target_code, word_space_size): output_vec[target_mask] = sample[0]['outputs'] weights_vec[target_mask] = 1.0 - input_vec = np.array([onehot(code, word_space_size) for code in input_vec]) - output_vec = np.array([onehot(code, word_space_size) for code in output_vec]) + input_vec = np.array([onehot(int(code), word_space_size) for code in input_vec]) + output_vec = np.array([onehot(int(code), word_space_size) for code in output_vec]) return ( np.reshape(input_vec, (1, -1, word_space_size)), @@ -90,7 +90,7 @@ def prepare_sample(sample, target_code, word_space_size): llprint("Building Computational Graph ... ") optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum) - summerizer = tf.train.SummaryWriter(tb_logs_dir, session.graph) + summerizer = tf.summary.FileWriter(tb_logs_dir, session.graph) ncomputer = DNC( RecurrentController, @@ -107,7 +107,7 @@ def prepare_sample(sample, target_code, word_space_size): loss_weights = tf.placeholder(tf.float32, [batch_size, None, 1]) loss = tf.reduce_mean( - loss_weights * tf.nn.softmax_cross_entropy_with_logits(output, ncomputer.target_output) + loss_weights * tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=ncomputer.target_output) ) summeries = [] @@ -118,19 +118,19 @@ def prepare_sample(sample, target_code, word_space_size): gradients[i] = (tf.clip_by_value(grad, -10, 10), var) for (grad, var) in gradients: if grad is not None: - summeries.append(tf.histogram_summary(var.name + '/grad', grad)) + summeries.append(tf.summary.histogram(var.name + '/grad', grad)) apply_gradients = optimizer.apply_gradients(gradients) - summeries.append(tf.scalar_summary("Loss", loss)) + summeries.append(tf.summary.scalar("Loss", loss)) - summerize_op = tf.merge_summary(summeries) + summerize_op = tf.summary.merge(summeries) no_summerize = tf.no_op() llprint("Done!\n") llprint("Initializing Variables ... ") - session.run(tf.initialize_all_variables()) + session.run(tf.global_variables_initializer()) llprint("Done!\n") if from_checkpoint is not None: diff --git a/tasks/copy/feedforward_controller.py b/tasks/copy/feedforward_controller.py index 608daadc..031733c9 100644 --- a/tasks/copy/feedforward_controller.py +++ b/tasks/copy/feedforward_controller.py @@ -10,7 +10,7 @@ class FeedforwardController(BaseController): def network_vars(self): - initial_std = lambda in_nodes: np.min(1e-2, np.sqrt(2.0 / in_nodes)) + initial_std = lambda in_nodes: np.minimum(1e-2, np.sqrt(2.0 / in_nodes)) input_ = self.nn_input_size self.W1 = tf.Variable(tf.truncated_normal([input_, 128], stddev=initial_std(input_)), name='layer1_W') @@ -29,7 +29,7 @@ def network_op(self, X): return l2_activation def initials(self): - initial_std = lambda in_nodes: np.min(1e-2, np.sqrt(2.0 / in_nodes)) + initial_std = lambda in_nodes: np.minimum(1e-2, np.sqrt(2.0 / in_nodes)) # defining internal weights of the controller self.interface_weights = tf.Variable( diff --git a/tasks/copy/train.py b/tasks/copy/train.py index 35df35df..de9f50a8 100644 --- a/tasks/copy/train.py +++ b/tasks/copy/train.py @@ -93,22 +93,22 @@ def binary_cross_entropy(predictions, targets): gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: - summeries.append(tf.histogram_summary(var.name + '/grad', grad)) + summeries.append(tf.summary.histogram(var.name + '/grad', grad)) gradients[i] = (tf.clip_by_value(grad, -10, 10), var) apply_gradients = optimizer.apply_gradients(gradients) - summeries.append(tf.scalar_summary("Loss", loss)) + summeries.append(tf.summary.scalar("Loss", loss)) - summerize_op = tf.merge_summary(summeries) + summerize_op = tf.summary.merge(summeries) no_summerize = tf.no_op() - summerizer = tf.train.SummaryWriter(tb_logs_dir, session.graph) + summerizer = tf.summary.FileWriter(tb_logs_dir, session.graph) llprint("Done!\n") llprint("Initializing Variables ... ") - session.run(tf.initialize_all_variables()) + session.run(tf.global_variables_initializer()) llprint("Done!\n") if from_checkpoint is not None: