Merge branch 'master' into neural-translation

ilblackdragon · ilblackdragon · commit b0e7758cd258 · 2016-02-09T12:52:07.000-08:00
diff --git a/examples/README.md b/examples/README.md
@@ -21,7 +21,8 @@
 
 ## Text classification
 
-* [Text Classification Using Recurrent Neural Networks on Words](text_classification.py) (See also [Simplified Version Using Built-in RNN Model](text_classification_builtin_rnn_model.py))
+* [Text Classification Using Recurrent Neural Networks on Words](text_classification.py) 
+(See also [Simplified Version Using Built-in RNN Model](text_classification_builtin_rnn_model.py) with easy to use built-in parameters)
 * [Text Classification Using Convolutional Neural Networks on Words](text_classification_cnn.py)
 * [Text Classification Using Recurrent Neural Networks on Characters](text_classification_character_rnn.py)
 * [Text Classification Using Convolutional Neural Networks on Characters](text_classification_character_cnn.py)
diff --git a/examples/resnet.py b/examples/resnet.py
@@ -15,6 +15,9 @@
 """
 This example builds deep residual network for mnist data. 
 Reference Paper: http://arxiv.org/pdf/1512.03385.pdf
+
+Note that this is still a work-in-progress. Feel free to submit a PR
+to make this better. 
 """
 
 import os
@@ -31,32 +34,30 @@
 
 
 def res_net(x, y, activation=tf.nn.relu):
-    """Builds a residual network.
+    """Builds a residual network. Note that if the input tensor is 2D, it must be
+    square in order to be converted to a 4D tensor. 
 
     Borrowed structure from here: https://github.com/pkmital/tensorflow_tutorials/blob/master/10_residual_network.py
 
     Args:
         x: Input of the network
         y: Output of the network
         activation: Activation function to apply after each convolution
-    Raises:
-        ValueError
-            If a 2D tensor is not square, it cannot be converted to a 
-            4D tensor.
     """
-    LayerBlock = namedtuple(
-        'LayerBlock', ['num_layers', 'num_filters', 'bottleneck_size'])
-    blocks = [LayerBlock(3, 128, 32),
-              LayerBlock(3, 256, 64),
-              LayerBlock(3, 512, 128),
-              LayerBlock(3, 1024, 256)]
-
-    # Input check
+
+    # Configurations for each bottleneck block
+    BottleneckBlock = namedtuple(
+        'BottleneckBlock', ['num_layers', 'num_filters', 'bottleneck_size'])
+    blocks = [BottleneckBlock(3, 128, 32),
+              BottleneckBlock(3, 256, 64),
+              BottleneckBlock(3, 512, 128),
+              BottleneckBlock(3, 1024, 256)]
+
     input_shape = x.get_shape().as_list()
+
+    # Reshape the input into the right shape if it's 2D tensor
     if len(input_shape) == 2:
         ndim = int(sqrt(input_shape[1]))
-        if ndim * ndim != input_shape[1]:
-            raise ValueError('input_shape should be square')
         x = tf.reshape(x, [-1, ndim, ndim, 1])
 
     # First convolution expands to 64 channels
@@ -74,11 +75,13 @@ def res_net(x, y, activation=tf.nn.relu):
                                [1, 1], [1, 1, 1, 1],
                                padding='VALID', bias=True)
 
-    # Create resnets for each residual block
+    # Create each bottleneck building block for each layer
     for block_i, block in enumerate(blocks):
         for layer_i in range(block.num_layers):
 
             name = 'block_%d/layer_%d' % (block_i, layer_i)
+
+            # 1x1 convolution responsible for reducing dimension
             with tf.variable_scope(name + '/conv_in'):
                 conv = skflow.ops.conv2d(net, block.num_filters,
                                          [1, 1], [1, 1, 1, 1],
@@ -95,6 +98,7 @@ def res_net(x, y, activation=tf.nn.relu):
                                          batch_norm=True,
                                          bias=False)
 
+            # 1x1 convolution responsible for restoring dimension
             with tf.variable_scope(name + '/conv_out'):
                 conv = skflow.ops.conv2d(conv, block.num_filters,
                                          [1, 1], [1, 1, 1, 1],
@@ -103,6 +107,8 @@ def res_net(x, y, activation=tf.nn.relu):
                                          batch_norm=True,
                                          bias=False)
 
+            # shortcut connections that turn the network into its counterpart
+            # residual function (identity shortcut)
             net = conv + net
 
         try:
@@ -116,16 +122,13 @@ def res_net(x, y, activation=tf.nn.relu):
         except IndexError:
             pass
 
-
+    net_shape = net.get_shape().as_list()
     net = tf.nn.avg_pool(net,
-                         ksize=[1, net.get_shape().as_list()[1],
-                                net.get_shape().as_list()[2], 1],
+                         ksize=[1, net_shape[1], net_shape[2], 1],
                          strides=[1, 1, 1, 1], padding='VALID')
-    net = tf.reshape(
-        net,
-        [-1, net.get_shape().as_list()[1] *
-         net.get_shape().as_list()[2] *
-         net.get_shape().as_list()[3]])
+
+    net_shape = net.get_shape().as_list()
+    net = tf.reshape(net, [-1, net_shape[1] * net_shape[2] * net_shape[3]])
 
     return skflow.models.logistic_regression(net, y)
 
diff --git a/skflow/estimators/rnn.py b/skflow/estimators/rnn.py
@@ -35,7 +35,7 @@ class TensorFlowRNNClassifier(TensorFlowEstimator, ClassifierMixin):
         input_op_fn: Function that will transform the input tensor, such as
                      creating word embeddings, byte list, etc. This takes
                      an argument X for input and returns transformed X.
-        bidirection: Whether this is a bidirectional rnn.
+        bidirectional: boolean, Whether this is a bidirectional rnn.
         sequence_length: If sequence_length is provided, dynamic calculation is performed.
                  This saves computational time when unrolling past max sequence length.
         initial_state: An initial state for the RNN. This must be a tensor of appropriate type
@@ -71,7 +71,7 @@ def exp_decay(global_step):
 
     def __init__(self, rnn_size, n_classes, cell_type='gru', num_layers=1,
                  input_op_fn=null_input_op_fn,
-                 initial_state=None, bidirection=False,
+                 initial_state=None, bidirectional=False,
                  sequence_length=None, tf_master="", batch_size=32,
                  steps=50, optimizer="SGD", learning_rate=0.1,
                  tf_random_seed=42, continue_training=False,
@@ -80,7 +80,7 @@ def __init__(self, rnn_size, n_classes, cell_type='gru', num_layers=1,
         self.rnn_size = rnn_size
         self.cell_type = cell_type
         self.input_op_fn = input_op_fn
-        self.bidirection = bidirection
+        self.bidirectional = bidirectional
         self.num_layers = num_layers
         self.sequence_length = sequence_length
         self.initial_state = initial_state
@@ -97,7 +97,7 @@ def __init__(self, rnn_size, n_classes, cell_type='gru', num_layers=1,
     def _model_fn(self, X, y):
         return models.get_rnn_model(self.rnn_size, self.cell_type,
                                     self.num_layers,
-                                    self.input_op_fn, self.bidirection,
+                                    self.input_op_fn, self.bidirectional,
                                     models.logistic_regression,
                                     self.sequence_length,
                                     self.initial_state)(X, y)
@@ -123,7 +123,7 @@ class TensorFlowRNNRegressor(TensorFlowEstimator, RegressorMixin):
         input_op_fn: Function that will transform the input tensor, such as
                      creating word embeddings, byte list, etc. This takes
                      an argument X for input and returns transformed X.
-        bidirection: Whether this is a bidirectional rnn.
+        bidirectional: boolean, Whether this is a bidirectional rnn.
         sequence_length: If sequence_length is provided, dynamic calculation is performed.
                  This saves computational time when unrolling past max sequence length.
         initial_state: An initial state for the RNN. This must be a tensor of appropriate type
@@ -152,7 +152,7 @@ def exp_decay(global_step):
 
     def __init__(self, rnn_size, cell_type='gru', num_layers=1,
                  input_op_fn=null_input_op_fn, initial_state=None,
-                 bidirection=False, sequence_length=None,
+                 bidirectional=False, sequence_length=None,
                  n_classes=0, tf_master="", batch_size=32,
                  steps=50, optimizer="SGD", learning_rate=0.1,
                  tf_random_seed=42, continue_training=False,
@@ -161,7 +161,7 @@ def __init__(self, rnn_size, cell_type='gru', num_layers=1,
         self.rnn_size = rnn_size
         self.cell_type = cell_type
         self.input_op_fn = input_op_fn
-        self.bidirection = bidirection
+        self.bidirectional = bidirectional
         self.num_layers = num_layers
         self.sequence_length = sequence_length
         self.initial_state = initial_state
@@ -178,7 +178,7 @@ def __init__(self, rnn_size, cell_type='gru', num_layers=1,
     def _model_fn(self, X, y):
         return models.get_rnn_model(self.rnn_size, self.cell_type,
                                     self.num_layers,
-                                    self.input_op_fn, self.bidirection,
+                                    self.input_op_fn, self.bidirectional,
                                     models.linear_regression,
                                     self.sequence_length,
                                     self.initial_state)(X, y)
diff --git a/skflow/models.py b/skflow/models.py
@@ -16,7 +16,6 @@
 from __future__ import division, print_function, absolute_import
 
 import tensorflow as tf
-from tensorflow.models.rnn import rnn, rnn_cell
 
 from skflow.ops import mean_squared_error_regressor, softmax_classifier, dnn
 
@@ -93,9 +92,102 @@ def dnn_estimator(X, y):
         return target_predictor_fn(layers, y)
     return dnn_estimator
 
+## This will be in Tensorflow 0.7.
+## TODO(ilblackdragon): Clean this up when it's released
+
+
+def _reverse_seq(input_seq, lengths):
+    """Reverse a list of Tensors up to specified lengths.
+    Args:
+        input_seq: Sequence of seq_len tensors of dimension (batch_size, depth)
+        lengths:   A tensor of dimension batch_size, containing lengths for each
+                   sequence in the batch. If "None" is specified, simply reverses
+                   the list.
+    Returns:
+        time-reversed sequence
+    """
+    if lengths is None:
+        return list(reversed(input_seq))
+
+    for input_ in input_seq:
+        input_.set_shape(input_.get_shape().with_rank(2))
+
+    # Join into (time, batch_size, depth)
+    s_joined = tf.pack(input_seq)
+
+    # Reverse along dimension 0
+    s_reversed = tf.reverse_sequence(s_joined, lengths, 0, 1)
+    # Split again into list
+    result = tf.unpack(s_reversed)
+    return result
+
+
+def bidirectional_rnn(cell_fw, cell_bw, inputs,
+                      initial_state_fw=None, initial_state_bw=None,
+                      dtype=None, sequence_length=None, scope=None):
+    """Creates a bidirectional recurrent neural network.
+    Similar to the unidirectional case above (rnn) but takes input and builds
+    independent forward and backward RNNs with the final forward and backward
+    outputs depth-concatenated, such that the output will have the format
+    [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of
+    forward and backward cell must match. The initial state for both directions
+    is zero by default (but can be set optionally) and no intermediate states are
+    ever returned -- the network is fully unrolled for the given (passed in)
+    length(s) of the sequence(s) or completely unrolled if length(s) is not given.
+    Args:
+        cell_fw: An instance of RNNCell, to be used for forward direction.
+        cell_bw: An instance of RNNCell, to be used for backward direction.
+        inputs: A length T list of inputs, each a tensor of shape
+          [batch_size, cell.input_size].
+        initial_state_fw: (optional) An initial state for the forward RNN.
+          This must be a tensor of appropriate type and shape
+          [batch_size x cell.state_size].
+        initial_state_bw: (optional) Same as for initial_state_fw.
+        dtype: (optional) The data type for the initial state.  Required if either
+          of the initial states are not provided.
+        sequence_length: (optional) An int64 vector (tensor) of size [batch_size],
+          containing the actual lengths for each of the sequences.
+        scope: VariableScope for the created subgraph; defaults to "BiRNN"
+    Returns:
+        A set of output `Tensors` where:
+          outputs is a length T list of outputs (one for each input), which
+          are depth-concatenated forward and backward outputs
+    Raises:
+        TypeError: If "cell_fw" or "cell_bw" is not an instance of RNNCell.
+        ValueError: If inputs is None or an empty list.
+    """
+
+    if not isinstance(cell_fw, tf.nn.rnn_cell.RNNCell):
+        raise TypeError("cell_fw must be an instance of RNNCell")
+    if not isinstance(cell_bw, tf.nn.rnn_cell.RNNCell):
+        raise TypeError("cell_bw must be an instance of RNNCell")
+    if not isinstance(inputs, list):
+        raise TypeError("inputs must be a list")
+    if not inputs:
+        raise ValueError("inputs must not be empty")
+
+    name = scope or "BiRNN"
+    # Forward direction
+    with tf.variable_scope(name + "_FW"):
+        output_fw, _ = tf.nn.rnn(cell_fw, inputs, initial_state_fw, dtype,
+                                 sequence_length)
+
+    # Backward direction
+    with tf.variable_scope(name + "_BW"):
+        tmp, _ = tf.nn.rnn(cell_bw, _reverse_seq(inputs, sequence_length),
+                           initial_state_bw, dtype, sequence_length)
+    output_bw = _reverse_seq(tmp, sequence_length)
+    # Concat each of the forward/backward outputs
+    outputs = [tf.concat(1, [fw, bw])
+               for fw, bw in zip(output_fw, output_bw)]
+
+    return outputs
+
+# End of Tensorflow 0.7
+
 
 def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn,
-                  bidirection, target_predictor_fn,
+                  bidirectional, target_predictor_fn,
                   sequence_length, initial_state):
     """Returns a function that creates a RNN TensorFlow subgraph with given
     params.
@@ -107,13 +199,14 @@ def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn,
         input_op_fn: Function that will transform the input tensor, such as
                      creating word embeddings, byte list, etc. This takes
                      an argument X for input and returns transformed X.
-        bidirection: Whether this is a bidirectional rnn.
+        bidirectional: boolean, Whether this is a bidirectional rnn.
         target_predictor_fn: Function that will predict target from input
                              features. This can be logistic regression,
                              linear regression or any other model,
                              that takes X, y and returns predictions and loss tensors.
         sequence_length: If sequence_length is provided, dynamic calculation is performed.
                          This saves computational time when unrolling past max sequence length.
+                         Required for bidirectional RNNs.
         initial_state: An initial state for the RNN. This must be a tensor of appropriate type
                        and shape [batch_size x cell.state_size].
 
@@ -124,26 +217,28 @@ def rnn_estimator(X, y):
         """RNN estimator with target predictor function on top."""
         X = input_op_fn(X)
         if cell_type == 'rnn':
-            cell_fn = rnn_cell.BasicRNNCell
+            cell_fn = tf.nn.rnn_cell.BasicRNNCell
         elif cell_type == 'gru':
-            cell_fn = rnn_cell.GRUCell
+            cell_fn = tf.nn.rnn_cell.GRUCell
         elif cell_type == 'lstm':
-            cell_fn = rnn_cell.BasicLSTMCell
+            cell_fn = tf.nn.rnn_cell.BasicLSTMCell
         else:
             raise ValueError("cell_type {} is not supported. ".format(cell_type))
-        if bidirection:
+        if bidirectional:
             # forward direction cell
-            rnn_fw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
+            rnn_fw_cell = tf.nn.rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
             # backward direction cell
-            rnn_bw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
+            rnn_bw_cell = tf.nn.rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
             # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
-            encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell,
-                                             sequence_length=sequence_length,
-                                             initial_state=initial_state)
+            encoding = bidirectional_rnn(rnn_fw_cell, rnn_bw_cell, X,
+                                         dtype=tf.float32,
+                                         sequence_length=sequence_length,
+                                         initial_state_fw=initial_state,
+                                         initial_state_bw=initial_state)
         else:
-            cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
-            _, encoding = rnn.rnn(cell, X, dtype=tf.float32,
-                                  sequence_length=sequence_length,
-                                  initial_state=initial_state)
+            cell = tf.nn.rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
+            _, encoding = tf.nn.rnn(cell, X, dtype=tf.float32,
+                                    sequence_length=sequence_length,
+                                    initial_state=initial_state)
         return target_predictor_fn(encoding[-1], y)
     return rnn_estimator
diff --git a/skflow/tests/test_nonlinear.py b/skflow/tests/test_nonlinear.py