diff --git a/dnc/controller.py b/dnc/controller.py
index dbb5a1ea..d4c62f79 100644
--- a/dnc/controller.py
+++ b/dnc/controller.py
@@ -190,7 +190,7 @@ def process_input(self, X, last_read_vectors, state=None):
         """
 
         flat_read_vectors = tf.reshape(last_read_vectors, (-1, self.word_size * self.read_heads))
-        complete_input = tf.concat(1, [X, flat_read_vectors])
+        complete_input = tf.concat([X, flat_read_vectors], 1)
         nn_output, nn_state = None, None
 
         if self.has_recurrent_nn:
diff --git a/dnc/dnc.py b/dnc/dnc.py
index 13d0ecfc..fe240830 100644
--- a/dnc/dnc.py
+++ b/dnc/dnc.py
@@ -1,5 +1,5 @@
 import tensorflow as tf
-from tensorflow.python.ops.rnn_cell import LSTMStateTuple
+from tensorflow.contrib.rnn.python.ops.core_rnn_cell import LSTMStateTuple
 from memory import Memory
 import utility
 import os
diff --git a/dnc/memory.py b/dnc/memory.py
index fa700987..4bc335a5 100644
--- a/dnc/memory.py
+++ b/dnc/memory.py
@@ -74,7 +74,7 @@ def get_lookup_weighting(self, memory_matrix, keys, strengths):
         normalized_memory = tf.nn.l2_normalize(memory_matrix, 2)
         normalized_keys = tf.nn.l2_normalize(keys, 1)
 
-        similiarity = tf.batch_matmul(normalized_memory, normalized_keys)
+        similiarity = tf.matmul(normalized_memory, normalized_keys)
         strengths = tf.expand_dims(strengths, 1)
 
         return tf.nn.softmax(similiarity * strengths, 1)
@@ -131,7 +131,7 @@ def get_allocation_weighting(self, sorted_usage, free_list):
             flat_unordered_allocation_weighting
         )
 
-        packed_wightings = flat_ordered_weightings.pack()
+        packed_wightings = flat_ordered_weightings.stack()
         return tf.reshape(packed_wightings, (self.batch_size, self.words_num))
 
 
@@ -188,8 +188,8 @@ def update_memory(self, memory_matrix, write_weighting, write_vector, erase_vect
         write_vector = tf.expand_dims(write_vector, 1)
         erase_vector = tf.expand_dims(erase_vector, 1)
 
-        erasing = memory_matrix * (1 - tf.batch_matmul(write_weighting, erase_vector))
-        writing = tf.batch_matmul(write_weighting, write_vector)
+        erasing = memory_matrix * (1 - tf.matmul(write_weighting, erase_vector))
+        writing = tf.matmul(write_weighting, write_vector)
         updated_memory = erasing + writing
 
         return updated_memory
@@ -239,7 +239,7 @@ def update_link_matrix(self, precedence_vector, link_matrix, write_weighting):
         precedence_vector = tf.expand_dims(precedence_vector, 1)
 
         reset_factor = 1 - utility.pairwise_add(write_weighting, is_batch=True)
-        updated_link_matrix = reset_factor * link_matrix + tf.batch_matmul(write_weighting, precedence_vector)
+        updated_link_matrix = reset_factor * link_matrix + tf.matmul(write_weighting, precedence_vector)
         updated_link_matrix = (1 - self.I) * updated_link_matrix  # eliminates self-links
 
         return updated_link_matrix
@@ -262,8 +262,8 @@ def get_directional_weightings(self, read_weightings, link_matrix):
             backward weighting: Tensor (batch_size, words_num, read_heads)
         """
 
-        forward_weighting = tf.batch_matmul(link_matrix, read_weightings)
-        backward_weighting = tf.batch_matmul(link_matrix, read_weightings, adj_x=True)
+        forward_weighting = tf.matmul(link_matrix, read_weightings)
+        backward_weighting = tf.matmul(link_matrix, read_weightings, adjoint_a=True)
 
         return forward_weighting, backward_weighting
 
@@ -308,7 +308,7 @@ def update_read_vectors(self, memory_matrix, read_weightings):
         Returns: Tensor (word_size, read_heads)
         """
 
-        updated_read_vectors = tf.batch_matmul(memory_matrix, read_weightings, adj_x=True)
+        updated_read_vectors = tf.matmul(memory_matrix, read_weightings, adjoint_a=True)
 
         return updated_read_vectors
 
diff --git a/dnc/utility.py b/dnc/utility.py
index 8e852ca1..4e090436 100644
--- a/dnc/utility.py
+++ b/dnc/utility.py
@@ -34,13 +34,13 @@ def pairwise_add(u, v=None, is_batch=False):
     n = u_shape[0] if not is_batch else u_shape[1]
 
     column_u = tf.reshape(u, (-1, 1) if not is_batch else (-1, n, 1))
-    U = tf.concat(1 if not is_batch else 2, [column_u] * n)
+    U = tf.concat([column_u] * n, 1 if not is_batch else 2)
 
     if v is u:
         return U + tf.transpose(U, None if not is_batch else [0, 2, 1])
     else:
         row_v = tf.reshape(v, (1, -1) if not is_batch else (-1, 1, n))
-        V = tf.concat(0 if not is_batch else 1, [row_v] * n)
+        V = tf.concat([row_v] * n, 0 if not is_batch else 1)
 
         return U + V
 
@@ -87,7 +87,7 @@ def unpack_into_tensorarray(value, axis, size=None):
     array = tf.TensorArray(dtype=dtype, size=array_size)
     dim_permutation = [axis] + range(1, axis) + [0] + range(axis + 1, rank)
     unpack_axis_major_value = tf.transpose(value, dim_permutation)
-    full_array = array.unpack(unpack_axis_major_value)
+    full_array = array.unstack(unpack_axis_major_value)
 
     return full_array
 
@@ -106,7 +106,7 @@ def pack_into_tensor(array, axis):
         the packed tensor
     """
 
-    packed_tensor = array.pack()
+    packed_tensor = array.stack()
     shape = packed_tensor.get_shape()
     rank = len(shape)
 
diff --git a/tasks/babi/recurrent_controller.py b/tasks/babi/recurrent_controller.py
index de480cb3..7a38b63c 100644
--- a/tasks/babi/recurrent_controller.py
+++ b/tasks/babi/recurrent_controller.py
@@ -1,5 +1,6 @@
 import numpy as np
 import tensorflow as tf
+from tensorflow.contrib.rnn.python.ops.core_rnn_cell import BasicLSTMCell
 from dnc.controller import BaseController
 
 """
@@ -11,7 +12,7 @@
 class RecurrentController(BaseController):
 
     def network_vars(self):
-        self.lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(256)
+        self.lstm_cell = BasicLSTMCell(256)
         self.state = self.lstm_cell.zero_state(self.batch_size, tf.float32)
 
     def network_op(self, X, state):
diff --git a/tasks/babi/train.py b/tasks/babi/train.py
index 3eaff15f..5ed421ce 100644
--- a/tasks/babi/train.py
+++ b/tasks/babi/train.py
@@ -34,8 +34,8 @@ def prepare_sample(sample, target_code, word_space_size):
     output_vec[target_mask] = sample[0]['outputs']
     weights_vec[target_mask] = 1.0
 
-    input_vec = np.array([onehot(code, word_space_size) for code in input_vec])
-    output_vec = np.array([onehot(code, word_space_size) for code in output_vec])
+    input_vec = np.array([onehot(int(code), word_space_size) for code in input_vec])
+    output_vec = np.array([onehot(int(code), word_space_size) for code in output_vec])
 
     return (
         np.reshape(input_vec, (1, -1, word_space_size)),
@@ -90,7 +90,7 @@ def prepare_sample(sample, target_code, word_space_size):
             llprint("Building Computational Graph ... ")
 
             optimizer = tf.train.RMSPropOptimizer(learning_rate, momentum=momentum)
-            summerizer = tf.train.SummaryWriter(tb_logs_dir, session.graph)
+            summerizer = tf.summary.FileWriter(tb_logs_dir, session.graph)
 
             ncomputer = DNC(
                 RecurrentController,
@@ -107,7 +107,7 @@ def prepare_sample(sample, target_code, word_space_size):
 
             loss_weights = tf.placeholder(tf.float32, [batch_size, None, 1])
             loss = tf.reduce_mean(
-                loss_weights * tf.nn.softmax_cross_entropy_with_logits(output, ncomputer.target_output)
+                loss_weights * tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=ncomputer.target_output)
             )
 
             summeries = []
@@ -118,19 +118,19 @@ def prepare_sample(sample, target_code, word_space_size):
                     gradients[i] = (tf.clip_by_value(grad, -10, 10), var)
             for (grad, var) in gradients:
                 if grad is not None:
-                    summeries.append(tf.histogram_summary(var.name + '/grad', grad))
+                    summeries.append(tf.summary.histogram(var.name + '/grad', grad))
 
             apply_gradients = optimizer.apply_gradients(gradients)
 
-            summeries.append(tf.scalar_summary("Loss", loss))
+            summeries.append(tf.summary.scalar("Loss", loss))
 
-            summerize_op = tf.merge_summary(summeries)
+            summerize_op = tf.summary.merge(summeries)
             no_summerize = tf.no_op()
 
             llprint("Done!\n")
 
             llprint("Initializing Variables ... ")
-            session.run(tf.initialize_all_variables())
+            session.run(tf.global_variables_initializer())
             llprint("Done!\n")
 
             if from_checkpoint is not None:
diff --git a/tasks/copy/feedforward_controller.py b/tasks/copy/feedforward_controller.py
index 608daadc..031733c9 100644
--- a/tasks/copy/feedforward_controller.py
+++ b/tasks/copy/feedforward_controller.py
@@ -10,7 +10,7 @@
 class FeedforwardController(BaseController):
 
     def network_vars(self):
-        initial_std = lambda in_nodes: np.min(1e-2, np.sqrt(2.0 / in_nodes))
+        initial_std = lambda in_nodes: np.minimum(1e-2, np.sqrt(2.0 / in_nodes))
         input_ = self.nn_input_size
 
         self.W1 = tf.Variable(tf.truncated_normal([input_, 128], stddev=initial_std(input_)), name='layer1_W')
@@ -29,7 +29,7 @@ def network_op(self, X):
         return l2_activation
 
     def initials(self):
-        initial_std = lambda in_nodes: np.min(1e-2, np.sqrt(2.0 / in_nodes))
+        initial_std = lambda in_nodes: np.minimum(1e-2, np.sqrt(2.0 / in_nodes))
 
         # defining internal weights of the controller
         self.interface_weights = tf.Variable(
diff --git a/tasks/copy/train.py b/tasks/copy/train.py
index 35df35df..de9f50a8 100644
--- a/tasks/copy/train.py
+++ b/tasks/copy/train.py
@@ -93,22 +93,22 @@ def binary_cross_entropy(predictions, targets):
             gradients = optimizer.compute_gradients(loss)
             for i, (grad, var) in enumerate(gradients):
                 if grad is not None:
-                    summeries.append(tf.histogram_summary(var.name + '/grad', grad))
+                    summeries.append(tf.summary.histogram(var.name + '/grad', grad))
                     gradients[i] = (tf.clip_by_value(grad, -10, 10), var)
 
             apply_gradients = optimizer.apply_gradients(gradients)
 
-            summeries.append(tf.scalar_summary("Loss", loss))
+            summeries.append(tf.summary.scalar("Loss", loss))
 
-            summerize_op = tf.merge_summary(summeries)
+            summerize_op = tf.summary.merge(summeries)
             no_summerize = tf.no_op()
 
-            summerizer = tf.train.SummaryWriter(tb_logs_dir, session.graph)
+            summerizer = tf.summary.FileWriter(tb_logs_dir, session.graph)
 
             llprint("Done!\n")
 
             llprint("Initializing Variables ... ")
-            session.run(tf.initialize_all_variables())
+            session.run(tf.global_variables_initializer())
             llprint("Done!\n")
 
             if from_checkpoint is not None: