Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dnc/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def process_input(self, X, last_read_vectors, state=None):
"""

flat_read_vectors = tf.reshape(last_read_vectors, (-1, self.word_size * self.read_heads))
complete_input = tf.concat(1, [X, flat_read_vectors])
complete_input = tf.concat([X, flat_read_vectors], 1)
nn_output, nn_state = None, None

if self.has_recurrent_nn:
Expand Down
2 changes: 1 addition & 1 deletion dnc/dnc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import tensorflow as tf
from tensorflow.python.ops.rnn_cell import LSTMStateTuple
from tensorflow.contrib.rnn.python.ops.core_rnn_cell import LSTMStateTuple
from memory import Memory
import utility
import os
Expand Down
16 changes: 8 additions & 8 deletions dnc/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def get_lookup_weighting(self, memory_matrix, keys, strengths):
normalized_memory = tf.nn.l2_normalize(memory_matrix, 2)
normalized_keys = tf.nn.l2_normalize(keys, 1)

similiarity = tf.batch_matmul(normalized_memory, normalized_keys)
similiarity = tf.matmul(normalized_memory, normalized_keys)
strengths = tf.expand_dims(strengths, 1)

return tf.nn.softmax(similiarity * strengths, 1)
Expand Down Expand Up @@ -131,7 +131,7 @@ def get_allocation_weighting(self, sorted_usage, free_list):
flat_unordered_allocation_weighting
)

packed_wightings = flat_ordered_weightings.pack()
packed_wightings = flat_ordered_weightings.stack()
return tf.reshape(packed_wightings, (self.batch_size, self.words_num))


Expand Down Expand Up @@ -188,8 +188,8 @@ def update_memory(self, memory_matrix, write_weighting, write_vector, erase_vect
write_vector = tf.expand_dims(write_vector, 1)
erase_vector = tf.expand_dims(erase_vector, 1)

erasing = memory_matrix * (1 - tf.batch_matmul(write_weighting, erase_vector))
writing = tf.batch_matmul(write_weighting, write_vector)
erasing = memory_matrix * (1 - tf.matmul(write_weighting, erase_vector))
writing = tf.matmul(write_weighting, write_vector)
updated_memory = erasing + writing

return updated_memory
Expand Down Expand Up @@ -239,7 +239,7 @@ def update_link_matrix(self, precedence_vector, link_matrix, write_weighting):
precedence_vector = tf.expand_dims(precedence_vector, 1)

reset_factor = 1 - utility.pairwise_add(write_weighting, is_batch=True)
updated_link_matrix = reset_factor * link_matrix + tf.batch_matmul(write_weighting, precedence_vector)
updated_link_matrix = reset_factor * link_matrix + tf.matmul(write_weighting, precedence_vector)
updated_link_matrix = (1 - self.I) * updated_link_matrix # eliminates self-links

return updated_link_matrix
Expand All @@ -262,8 +262,8 @@ def get_directional_weightings(self, read_weightings, link_matrix):
backward weighting: Tensor (batch_size, words_num, read_heads)
"""

forward_weighting = tf.batch_matmul(link_matrix, read_weightings)
backward_weighting = tf.batch_matmul(link_matrix, read_weightings, adj_x=True)
forward_weighting = tf.matmul(link_matrix, read_weightings)
backward_weighting = tf.matmul(link_matrix, read_weightings, adjoint_a=True)

return forward_weighting, backward_weighting

Expand Down Expand Up @@ -308,7 +308,7 @@ def update_read_vectors(self, memory_matrix, read_weightings):
Returns: Tensor (word_size, read_heads)
"""

updated_read_vectors = tf.batch_matmul(memory_matrix, read_weightings, adj_x=True)
updated_read_vectors = tf.matmul(memory_matrix, read_weightings, adjoint_a=True)

return updated_read_vectors

Expand Down
8 changes: 4 additions & 4 deletions dnc/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ def pairwise_add(u, v=None, is_batch=False):
n = u_shape[0] if not is_batch else u_shape[1]

column_u = tf.reshape(u, (-1, 1) if not is_batch else (-1, n, 1))
U = tf.concat(1 if not is_batch else 2, [column_u] * n)
U = tf.concat([column_u] * n, 1 if not is_batch else 2)

if v is u:
return U + tf.transpose(U, None if not is_batch else [0, 2, 1])
else:
row_v = tf.reshape(v, (1, -1) if not is_batch else (-1, 1, n))
V = tf.concat(0 if not is_batch else 1, [row_v] * n)
V = tf.concat([row_v] * n, 0 if not is_batch else 1)

return U + V

Expand Down Expand Up @@ -87,7 +87,7 @@ def unpack_into_tensorarray(value, axis, size=None):
array = tf.TensorArray(dtype=dtype, size=array_size)
dim_permutation = [axis] + range(1, axis) + [0] + range(axis + 1, rank)
unpack_axis_major_value = tf.transpose(value, dim_permutation)
full_array = array.unpack(unpack_axis_major_value)
full_array = array.unstack(unpack_axis_major_value)

return full_array

Expand All @@ -106,7 +106,7 @@ def pack_into_tensor(array, axis):
the packed tensor
"""

packed_tensor = array.pack()
packed_tensor = array.stack()
shape = packed_tensor.get_shape()
rank = len(shape)

Expand Down
3 changes: 2 additions & 1 deletion tasks/babi/recurrent_controller.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import tensorflow as tf
from tensorflow.contrib.rnn.python.ops.core_rnn_cell import BasicLSTMCell
from dnc.controller import BaseController

"""
Expand All @@ -11,7 +12,7 @@
class RecurrentController(BaseController):

def network_vars(self):
self.lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(256)
self.lstm_cell = BasicLSTMCell(256)
self.state = self.lstm_cell.zero_state(self.batch_size, tf.float32)

def network_op(self, X, state):
Expand Down
16 changes: 8 additions & 8 deletions tasks/babi/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def prepare_sample(sample, target_code, word_space_size):
output_vec[target_mask] = sample[0]['outputs']
weights_vec[target_mask] = 1.0

input_vec = np.array([onehot(code, word_space_size) for code in input_vec])
output_vec = np.array([onehot(code, word_space_size) for code in output_vec])
input_vec = np.array([onehot(int(code), word_space_size) for code in input_vec])
output_vec = np.array([onehot(int(code), word_space_size) for code in output_vec])

return (
np.reshape(input_vec, (1, -1, word_space_size)),
Expand Down Expand Up @@ -90,7 +90,7 @@ def prepare_sample(sample, target_code, word_space_size):
llprint("Building Computational Graph ... ")

optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum)
summerizer = tf.train.SummaryWriter(tb_logs_dir, session.graph)
summerizer = tf.summary.FileWriter(tb_logs_dir, session.graph)

ncomputer = DNC(
RecurrentController,
Expand All @@ -107,7 +107,7 @@ def prepare_sample(sample, target_code, word_space_size):

loss_weights = tf.placeholder(tf.float32, [batch_size, None, 1])
loss = tf.reduce_mean(
loss_weights * tf.nn.softmax_cross_entropy_with_logits(output, ncomputer.target_output)
loss_weights * tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=ncomputer.target_output)
)

summeries = []
Expand All @@ -118,19 +118,19 @@ def prepare_sample(sample, target_code, word_space_size):
gradients[i] = (tf.clip_by_value(grad, -10, 10), var)
for (grad, var) in gradients:
if grad is not None:
summeries.append(tf.histogram_summary(var.name + '/grad', grad))
summeries.append(tf.summary.histogram(var.name + '/grad', grad))

apply_gradients = optimizer.apply_gradients(gradients)

summeries.append(tf.scalar_summary("Loss", loss))
summeries.append(tf.summary.scalar("Loss", loss))

summerize_op = tf.merge_summary(summeries)
summerize_op = tf.summary.merge(summeries)
no_summerize = tf.no_op()

llprint("Done!\n")

llprint("Initializing Variables ... ")
session.run(tf.initialize_all_variables())
session.run(tf.global_variables_initializer())
llprint("Done!\n")

if from_checkpoint is not None:
Expand Down
4 changes: 2 additions & 2 deletions tasks/copy/feedforward_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
class FeedforwardController(BaseController):

def network_vars(self):
initial_std = lambda in_nodes: np.min(1e-2, np.sqrt(2.0 / in_nodes))
initial_std = lambda in_nodes: np.minimum(1e-2, np.sqrt(2.0 / in_nodes))
input_ = self.nn_input_size

self.W1 = tf.Variable(tf.truncated_normal([input_, 128], stddev=initial_std(input_)), name='layer1_W')
Expand All @@ -29,7 +29,7 @@ def network_op(self, X):
return l2_activation

def initials(self):
initial_std = lambda in_nodes: np.min(1e-2, np.sqrt(2.0 / in_nodes))
initial_std = lambda in_nodes: np.minimum(1e-2, np.sqrt(2.0 / in_nodes))

# defining internal weights of the controller
self.interface_weights = tf.Variable(
Expand Down
10 changes: 5 additions & 5 deletions tasks/copy/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,22 +93,22 @@ def binary_cross_entropy(predictions, targets):
gradients = optimizer.compute_gradients(loss)
for i, (grad, var) in enumerate(gradients):
if grad is not None:
summeries.append(tf.histogram_summary(var.name + '/grad', grad))
summeries.append(tf.summary.histogram(var.name + '/grad', grad))
gradients[i] = (tf.clip_by_value(grad, -10, 10), var)

apply_gradients = optimizer.apply_gradients(gradients)

summeries.append(tf.scalar_summary("Loss", loss))
summeries.append(tf.summary.scalar("Loss", loss))

summerize_op = tf.merge_summary(summeries)
summerize_op = tf.summary.merge(summeries)
no_summerize = tf.no_op()

summerizer = tf.train.SummaryWriter(tb_logs_dir, session.graph)
summerizer = tf.summary.FileWriter(tb_logs_dir, session.graph)

llprint("Done!\n")

llprint("Initializing Variables ... ")
session.run(tf.initialize_all_variables())
session.run(tf.global_variables_initializer())
llprint("Done!\n")

if from_checkpoint is not None:
Expand Down