Merge branch 'resnet'

ilblackdragon · ilblackdragon · commit a131b4094071 · 2016-02-09T09:51:21.000-08:00
diff --git a/skflow/estimators/base.py b/skflow/estimators/base.py
@@ -264,7 +264,7 @@ def predict(self, X, axis=1, batch_size=-1):
 
         Args:
             X: array-like matrix, [n_samples, n_features...] or iterator.
-            axis: Which axis to argmax for classification. 
+            axis: Which axis to argmax for classification.
                   By default axis 1 (next after batch) is used.
                   Use 2 for sequence predictions.
             batch_size: If test set is too big, use batch size to split
diff --git a/skflow/estimators/rnn.py b/skflow/estimators/rnn.py
@@ -35,7 +35,7 @@ class TensorFlowRNNClassifier(TensorFlowEstimator, ClassifierMixin):
         input_op_fn: Function that will transform the input tensor, such as
                      creating word embeddings, byte list, etc. This takes
                      an argument X for input and returns transformed X.
-        bidirection: Whether this is a bidirectional rnn.
+        bidirectional: boolean, Whether this is a bidirectional rnn.
         sequence_length: If sequence_length is provided, dynamic calculation is performed.
                  This saves computational time when unrolling past max sequence length.
         initial_state: An initial state for the RNN. This must be a tensor of appropriate type
@@ -71,7 +71,7 @@ def exp_decay(global_step):
 
     def __init__(self, rnn_size, n_classes, cell_type='gru', num_layers=1,
                  input_op_fn=null_input_op_fn,
-                 initial_state=None, bidirection=False,
+                 initial_state=None, bidirectional=False,
                  sequence_length=None, tf_master="", batch_size=32,
                  steps=50, optimizer="SGD", learning_rate=0.1,
                  tf_random_seed=42, continue_training=False,
@@ -80,7 +80,7 @@ def __init__(self, rnn_size, n_classes, cell_type='gru', num_layers=1,
         self.rnn_size = rnn_size
         self.cell_type = cell_type
         self.input_op_fn = input_op_fn
-        self.bidirection = bidirection
+        self.bidirectional = bidirectional
         self.num_layers = num_layers
         self.sequence_length = sequence_length
         self.initial_state = initial_state
@@ -97,7 +97,7 @@ def __init__(self, rnn_size, n_classes, cell_type='gru', num_layers=1,
     def _model_fn(self, X, y):
         return models.get_rnn_model(self.rnn_size, self.cell_type,
                                     self.num_layers,
-                                    self.input_op_fn, self.bidirection,
+                                    self.input_op_fn, self.bidirectional,
                                     models.logistic_regression,
                                     self.sequence_length,
                                     self.initial_state)(X, y)
@@ -123,7 +123,7 @@ class TensorFlowRNNRegressor(TensorFlowEstimator, RegressorMixin):
         input_op_fn: Function that will transform the input tensor, such as
                      creating word embeddings, byte list, etc. This takes
                      an argument X for input and returns transformed X.
-        bidirection: Whether this is a bidirectional rnn.
+        bidirectional: boolean, Whether this is a bidirectional rnn.
         sequence_length: If sequence_length is provided, dynamic calculation is performed.
                  This saves computational time when unrolling past max sequence length.
         initial_state: An initial state for the RNN. This must be a tensor of appropriate type
@@ -152,7 +152,7 @@ def exp_decay(global_step):
 
     def __init__(self, rnn_size, cell_type='gru', num_layers=1,
                  input_op_fn=null_input_op_fn, initial_state=None,
-                 bidirection=False, sequence_length=None,
+                 bidirectional=False, sequence_length=None,
                  n_classes=0, tf_master="", batch_size=32,
                  steps=50, optimizer="SGD", learning_rate=0.1,
                  tf_random_seed=42, continue_training=False,
@@ -161,7 +161,7 @@ def __init__(self, rnn_size, cell_type='gru', num_layers=1,
         self.rnn_size = rnn_size
         self.cell_type = cell_type
         self.input_op_fn = input_op_fn
-        self.bidirection = bidirection
+        self.bidirectional = bidirectional
         self.num_layers = num_layers
         self.sequence_length = sequence_length
         self.initial_state = initial_state
@@ -178,7 +178,7 @@ def __init__(self, rnn_size, cell_type='gru', num_layers=1,
     def _model_fn(self, X, y):
         return models.get_rnn_model(self.rnn_size, self.cell_type,
                                     self.num_layers,
-                                    self.input_op_fn, self.bidirection,
+                                    self.input_op_fn, self.bidirectional,
                                     models.linear_regression,
                                     self.sequence_length,
                                     self.initial_state)(X, y)
diff --git a/skflow/io/data_feeder.py b/skflow/io/data_feeder.py
@@ -17,9 +17,10 @@
 from __future__ import division, print_function, absolute_import
 
 import itertools
+import math
+
 import six
 from six.moves import xrange   # pylint: disable=redefined-builtin
-import math
 
 import numpy as np
 from sklearn.utils import check_array
diff --git a/skflow/models.py b/skflow/models.py
@@ -16,7 +16,6 @@
 from __future__ import division, print_function, absolute_import
 
 import tensorflow as tf
-from tensorflow.models.rnn import rnn, rnn_cell
 
 from skflow.ops import mean_squared_error_regressor, softmax_classifier, dnn
 
@@ -93,9 +92,102 @@ def dnn_estimator(X, y):
         return target_predictor_fn(layers, y)
     return dnn_estimator
 
+## This will be in Tensorflow 0.7.
+## TODO(ilblackdragon): Clean this up when it's released
+
+
+def _reverse_seq(input_seq, lengths):
+    """Reverse a list of Tensors up to specified lengths.
+    Args:
+        input_seq: Sequence of seq_len tensors of dimension (batch_size, depth)
+        lengths:   A tensor of dimension batch_size, containing lengths for each
+                   sequence in the batch. If "None" is specified, simply reverses
+                   the list.
+    Returns:
+        time-reversed sequence
+    """
+    if lengths is None:
+        return list(reversed(input_seq))
+
+    for input_ in input_seq:
+        input_.set_shape(input_.get_shape().with_rank(2))
+
+    # Join into (time, batch_size, depth)
+    s_joined = tf.pack(input_seq)
+
+    # Reverse along dimension 0
+    s_reversed = tf.reverse_sequence(s_joined, lengths, 0, 1)
+    # Split again into list
+    result = tf.unpack(s_reversed)
+    return result
+
+
+def bidirectional_rnn(cell_fw, cell_bw, inputs,
+                      initial_state_fw=None, initial_state_bw=None,
+                      dtype=None, sequence_length=None, scope=None):
+    """Creates a bidirectional recurrent neural network.
+    Similar to the unidirectional case above (rnn) but takes input and builds
+    independent forward and backward RNNs with the final forward and backward
+    outputs depth-concatenated, such that the output will have the format
+    [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of
+    forward and backward cell must match. The initial state for both directions
+    is zero by default (but can be set optionally) and no intermediate states are
+    ever returned -- the network is fully unrolled for the given (passed in)
+    length(s) of the sequence(s) or completely unrolled if length(s) is not given.
+    Args:
+        cell_fw: An instance of RNNCell, to be used for forward direction.
+        cell_bw: An instance of RNNCell, to be used for backward direction.
+        inputs: A length T list of inputs, each a tensor of shape
+          [batch_size, cell.input_size].
+        initial_state_fw: (optional) An initial state for the forward RNN.
+          This must be a tensor of appropriate type and shape
+          [batch_size x cell.state_size].
+        initial_state_bw: (optional) Same as for initial_state_fw.
+        dtype: (optional) The data type for the initial state.  Required if either
+          of the initial states are not provided.
+        sequence_length: (optional) An int64 vector (tensor) of size [batch_size],
+          containing the actual lengths for each of the sequences.
+        scope: VariableScope for the created subgraph; defaults to "BiRNN"
+    Returns:
+        A set of output `Tensors` where:
+          outputs is a length T list of outputs (one for each input), which
+          are depth-concatenated forward and backward outputs
+    Raises:
+        TypeError: If "cell_fw" or "cell_bw" is not an instance of RNNCell.
+        ValueError: If inputs is None or an empty list.
+    """
+
+    if not isinstance(cell_fw, tf.nn.rnn_cell.RNNCell):
+        raise TypeError("cell_fw must be an instance of RNNCell")
+    if not isinstance(cell_bw, tf.nn.rnn_cell.RNNCell):
+        raise TypeError("cell_bw must be an instance of RNNCell")
+    if not isinstance(inputs, list):
+        raise TypeError("inputs must be a list")
+    if not inputs:
+        raise ValueError("inputs must not be empty")
+
+    name = scope or "BiRNN"
+    # Forward direction
+    with tf.variable_scope(name + "_FW"):
+        output_fw, _ = tf.nn.rnn(cell_fw, inputs, initial_state_fw, dtype,
+                                 sequence_length)
+
+    # Backward direction
+    with tf.variable_scope(name + "_BW"):
+        tmp, _ = tf.nn.rnn(cell_bw, _reverse_seq(inputs, sequence_length),
+                           initial_state_bw, dtype, sequence_length)
+    output_bw = _reverse_seq(tmp, sequence_length)
+    # Concat each of the forward/backward outputs
+    outputs = [tf.concat(1, [fw, bw])
+               for fw, bw in zip(output_fw, output_bw)]
+
+    return outputs
+
+# End of Tensorflow 0.7
+
 
 def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn,
-                  bidirection, target_predictor_fn,
+                  bidirectional, target_predictor_fn,
                   sequence_length, initial_state):
     """Returns a function that creates a RNN TensorFlow subgraph with given
     params.
@@ -107,13 +199,14 @@ def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn,
         input_op_fn: Function that will transform the input tensor, such as
                      creating word embeddings, byte list, etc. This takes
                      an argument X for input and returns transformed X.
-        bidirection: Whether this is a bidirectional rnn.
+        bidirectional: boolean, Whether this is a bidirectional rnn.
         target_predictor_fn: Function that will predict target from input
                              features. This can be logistic regression,
                              linear regression or any other model,
                              that takes X, y and returns predictions and loss tensors.
         sequence_length: If sequence_length is provided, dynamic calculation is performed.
                          This saves computational time when unrolling past max sequence length.
+                         Required for bidirectional RNNs.
         initial_state: An initial state for the RNN. This must be a tensor of appropriate type
                        and shape [batch_size x cell.state_size].
 
@@ -124,26 +217,28 @@ def rnn_estimator(X, y):
         """RNN estimator with target predictor function on top."""
         X = input_op_fn(X)
         if cell_type == 'rnn':
-            cell_fn = rnn_cell.BasicRNNCell
+            cell_fn = tf.nn.rnn_cell.BasicRNNCell
         elif cell_type == 'gru':
-            cell_fn = rnn_cell.GRUCell
+            cell_fn = tf.nn.rnn_cell.GRUCell
         elif cell_type == 'lstm':
-            cell_fn = rnn_cell.BasicLSTMCell
+            cell_fn = tf.nn.rnn_cell.BasicLSTMCell
         else:
             raise ValueError("cell_type {} is not supported. ".format(cell_type))
-        if bidirection:
+        if bidirectional:
             # forward direction cell
-            rnn_fw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
+            rnn_fw_cell = tf.nn.rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
             # backward direction cell
-            rnn_bw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
+            rnn_bw_cell = tf.nn.rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
             # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
-            encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell,
-                                             sequence_length=sequence_length,
-                                             initial_state=initial_state)
+            encoding = bidirectional_rnn(rnn_fw_cell, rnn_bw_cell, X,
+                                         dtype=tf.float32,
+                                         sequence_length=sequence_length,
+                                         initial_state_fw=initial_state,
+                                         initial_state_bw=initial_state)
         else:
-            cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
-            _, encoding = rnn.rnn(cell, X, dtype=tf.float32,
-                                  sequence_length=sequence_length,
-                                  initial_state=initial_state)
+            cell = tf.nn.rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
+            _, encoding = tf.nn.rnn(cell, X, dtype=tf.float32,
+                                    sequence_length=sequence_length,
+                                    initial_state=initial_state)
         return target_predictor_fn(encoding[-1], y)
     return rnn_estimator
diff --git a/skflow/tests/test_nonlinear.py b/skflow/tests/test_nonlinear.py
@@ -99,6 +99,27 @@ def input_fn(X):
         regressor.weights_
         regressor.bias_
         predictions = regressor.predict(test_data)
+    
+    def testBidirectionalRNN(self):
+        random.seed(42)
+        import numpy as np
+        data = np.array(list([[2, 1, 2, 2, 3],
+                              [2, 2, 3, 4, 5],
+                              [3, 3, 1, 2, 1],
+                              [2, 4, 5, 4, 1]]), dtype=np.float32)
+        labels = np.array(list([1, 0, 1, 0]), dtype=np.float32)
+        def input_fn(X):
+            return tf.split(1, 5, X)
+
+        # Classification
+        classifier = skflow.TensorFlowRNNClassifier(
+            rnn_size=2, cell_type='lstm', n_classes=2, input_op_fn=input_fn,
+            bidirectional=True)
+        classifier.fit(data, labels)
+        predictions = classifier.predict(np.array(list([[1, 3, 3, 2, 1],
+                                                        [2, 3, 4, 5, 6]])))
+        self.assertAllClose(predictions, np.array([1, 0]))
+        
 
 if __name__ == "__main__":
     tf.test.main()