donnemartin
diff --git a/‎README.md
Lines changed: 1 addition & 0 deletions b/‎README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎deep-learning/theano-tutorial/rnn_tutorial/Makefile
Lines changed: 13 additions & 0 deletions b/‎deep-learning/theano-tutorial/rnn_tutorial/Makefile
Lines changed: 13 additions & 0 deletions
diff --git a/‎deep-learning/theano-tutorial/rnn_tutorial/instruction.pdf
2.69 MB b/‎deep-learning/theano-tutorial/rnn_tutorial/instruction.pdf
2.69 MB
diff --git a/‎deep-learning/theano-tutorial/rnn_tutorial/lstm_text.ipynb
Lines changed: 508 additions & 0 deletions b/‎deep-learning/theano-tutorial/rnn_tutorial/lstm_text.ipynb
Lines changed: 508 additions & 0 deletions
diff --git a/‎deep-learning/theano-tutorial/rnn_tutorial/lstm_text.py
Lines changed: 299 additions & 0 deletions b/‎deep-learning/theano-tutorial/rnn_tutorial/lstm_text.py
Lines changed: 299 additions & 0 deletions
diff --git a/‎deep-learning/theano-tutorial/rnn_tutorial/rnn_lstm.pdf
2.81 MB b/‎deep-learning/theano-tutorial/rnn_tutorial/rnn_lstm.pdf
2.81 MB
@@ -96,6 +96,7 @@ IPython Notebook(s) demonstrating deep learning functionality.
 | [theano-intro](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/deep-learning/theano-tutorial/intro_theano/intro_theano.ipynb) |  Intro to Theano, which allows you to define, optimize, and evaluate mathematical expressions involving multi-dimensional arrays efficiently. It can use GPUs and perform efficient symbolic differentiation. |
 | [theano-scan](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/deep-learning/theano-tutorial/scan_tutorial/scan_tutorial.ipynb) |  Learn scans, a mechanism to perform loops in a Theano graph. |
 | [theano-logistic](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/deep-learning/theano-tutorial/intro_theano/logistic_regression.ipynb) |  Implement logistic regression in Theano. |
+| [theano-rnn](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/deep-learning/theano-tutorial/rnn_tutorial/simple_rnn.ipynb) |  Implement recurrent neural networks in Theano. |
 | [deep-dream](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/deep-learning/deep-dream/dream.ipynb) |  Caffe-based computer vision program which uses a convolutional neural network to find and enhance patterns in images. |
 
 <br/>
 
@@ -0,0 +1,13 @@
+all: instruction.pdf rnn_lstm.pdf
+
+instruction.pdf: slides_source/instruction.tex
+	cd slides_source; pdflatex --shell-escape instruction.tex
+	cd slides_source; pdflatex --shell-escape instruction.tex
+	cd slides_source; pdflatex --shell-escape instruction.tex
+	mv slides_source/instruction.pdf .
+
+rnn_lstm.pdf: slides_source/rnn_lstm.tex
+	cd slides_source; pdflatex --shell-escape rnn_lstm.tex
+	cd slides_source; pdflatex --shell-escape rnn_lstm.tex
+	cd slides_source; pdflatex --shell-escape rnn_lstm.tex
+	mv slides_source/rnn_lstm.pdf .
@@ -0,0 +1,299 @@
+import cPickle as pkl
+import time
+
+import numpy
+import theano
+from theano import config
+import theano.tensor as T
+from theano.tensor.nnet import categorical_crossentropy
+
+from fuel.datasets import TextFile
+from fuel.streams import DataStream
+from fuel.schemes import ConstantScheme
+from fuel.transformers import Batch, Padding
+
+
+# These files can be downloaded from
+# http://www-etud.iro.umontreal.ca/~brakelp/train.txt.gz
+# http://www-etud.iro.umontreal.ca/~brakelp/dictionary.pkl
+# don't forget to change the paths and gunzip train.txt.gz
+TRAIN_FILE = '/u/brakelp/temp/traindata.txt'
+VAL_FILE = '/u/brakelp/temp/valdata.txt'
+DICT_FILE = '/u/brakelp/temp/dictionary.pkl'
+
+
+def sequence_categorical_crossentropy(prediction, targets, mask):
+    prediction_flat = prediction.reshape(((prediction.shape[0] *
+                                           prediction.shape[1]),
+                                          prediction.shape[2]), ndim=2)
+    targets_flat = targets.flatten()
+    mask_flat = mask.flatten()
+    ce = categorical_crossentropy(prediction_flat, targets_flat)
+    return T.sum(ce * mask_flat)
+
+
+def gauss_weight(ndim_in, ndim_out=None, sd=.005):
+    if ndim_out is None:
+        ndim_out = ndim_in
+    W = numpy.random.randn(ndim_in, ndim_out) * sd
+    return numpy.asarray(W, dtype=config.floatX)
+
+
+class LogisticRegression(object):
+    """Multi-class Logistic Regression Class
+
+    The logistic regression is fully described by a weight matrix :math:`W`
+    and bias vector :math:`b`. Classification is done by projecting data
+    points onto a set of hyperplanes, the distance to which is used to
+    determine a class membership probability.
+    """
+
+    def __init__(self, input, n_in, n_out):
+        """ Initialize the parameters of the logistic regression
+
+        :type input: theano.tensor.TensorType
+        :param input: symbolic variable that describes the input of the
+                      architecture (one minibatch)
+
+        :type n_in: int
+        :param n_in: number of input units, the dimension of the space in
+                     which the datapoints lie
+
+        :type n_out: int
+        :param n_out: number of output units, the dimension of the space in
+                      which the labels lie
+
+        """
+
+        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
+        self.W = theano.shared(value=numpy.zeros((n_in, n_out),
+                                                 dtype=theano.config.floatX),
+                               name='W', borrow=True)
+        # initialize the baises b as a vector of n_out 0s
+        self.b = theano.shared(value=numpy.zeros((n_out,),
+                                                 dtype=theano.config.floatX),
+                               name='b', borrow=True)
+
+        # compute vector of class-membership probabilities in symbolic form
+        energy = T.dot(input, self.W) + self.b
+        energy_exp = T.exp(energy - T.max(energy, 2)[:, :, None])
+        pmf = energy_exp / energy_exp.sum(2)[:, :, None]
+        self.p_y_given_x = pmf
+
+        # compute prediction as class whose probability is maximal in
+        # symbolic form
+        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
+
+        # parameters of the model
+        self.params = [self.W, self.b]
+
+
+def index_dot(indices, w):
+    return w[indices.flatten()]
+
+
+class LstmLayer:
+
+    def __init__(self, rng, input, mask, n_in, n_h):
+
+        # Init params
+        self.W_i = theano.shared(gauss_weight(n_in, n_h), 'W_i', borrow=True)
+        self.W_f = theano.shared(gauss_weight(n_in, n_h), 'W_f', borrow=True)
+        self.W_c = theano.shared(gauss_weight(n_in, n_h), 'W_c', borrow=True)
+        self.W_o = theano.shared(gauss_weight(n_in, n_h), 'W_o', borrow=True)
+
+        self.U_i = theano.shared(gauss_weight(n_h), 'U_i', borrow=True)
+        self.U_f = theano.shared(gauss_weight(n_h), 'U_f', borrow=True)
+        self.U_c = theano.shared(gauss_weight(n_h), 'U_c', borrow=True)
+        self.U_o = theano.shared(gauss_weight(n_h), 'U_o', borrow=True)
+
+        self.b_i = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
+                                 'b_i', borrow=True)
+        self.b_f = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
+                                 'b_f', borrow=True)
+        self.b_c = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
+                                 'b_c', borrow=True)
+        self.b_o = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
+                                 'b_o', borrow=True)
+
+        self.params = [self.W_i, self.W_f, self.W_c, self.W_o,
+                       self.U_i, self.U_f, self.U_c, self.U_o,
+                       self.b_i, self.b_f, self.b_c, self.b_o]
+
+        outputs_info = [T.zeros((input.shape[1], n_h)),
+                        T.zeros((input.shape[1], n_h))]
+
+        rval, updates = theano.scan(self._step,
+                                    sequences=[mask, input],
+                                    outputs_info=outputs_info)
+
+        # self.output is in the format (batchsize, n_h)
+        self.output = rval[0]
+
+    def _step(self, m_, x_, h_, c_):
+
+        i_preact = (index_dot(x_, self.W_i) +
+                    T.dot(h_, self.U_i) + self.b_i)
+        i = T.nnet.sigmoid(i_preact)
+
+        f_preact = (index_dot(x_, self.W_f) +
+                    T.dot(h_, self.U_f) + self.b_f)
+        f = T.nnet.sigmoid(f_preact)
+
+        o_preact = (index_dot(x_, self.W_o) +
+                    T.dot(h_, self.U_o) + self.b_o)
+        o = T.nnet.sigmoid(o_preact)
+
+        c_preact = (index_dot(x_, self.W_c) +
+                    T.dot(h_, self.U_c) + self.b_c)
+        c = T.tanh(c_preact)
+
+        c = f * c_ + i * c
+        c = m_[:, None] * c + (1. - m_)[:, None] * c_
+
+        h = o * T.tanh(c)
+        h = m_[:, None] * h + (1. - m_)[:, None] * h_
+
+        return h, c
+
+
+def train_model(batch_size=100, n_h=50, n_epochs=40):
+
+    # Load the datasets with Fuel
+    dictionary = pkl.load(open(DICT_FILE, 'r'))
+    dictionary['~'] = len(dictionary)
+    reverse_mapping = dict((j, i) for i, j in dictionary.items())
+
+    print("Loading the data")
+    train = TextFile(files=[TRAIN_FILE],
+                     dictionary=dictionary,
+                     unk_token='~',
+                     level='character',
+                     preprocess=str.lower,
+                     bos_token=None,
+                     eos_token=None)
+
+    train_stream = DataStream.default_stream(train)
+
+    # organize data in batches and pad shorter sequences with zeros
+    train_stream = Batch(train_stream,
+                         iteration_scheme=ConstantScheme(batch_size))
+    train_stream = Padding(train_stream)
+
+    # idem dito for the validation text
+    val = TextFile(files=[VAL_FILE],
+                     dictionary=dictionary,
+                     unk_token='~',
+                     level='character',
+                     preprocess=str.lower,
+                     bos_token=None,
+                     eos_token=None)
+
+    val_stream = DataStream.default_stream(val)
+
+    # organize data in batches and pad shorter sequences with zeros
+    val_stream = Batch(val_stream,
+                         iteration_scheme=ConstantScheme(batch_size))
+    val_stream = Padding(val_stream)
+
+    print('Building model')
+
+    # Set the random number generator' seeds for consistency
+    rng = numpy.random.RandomState(12345)
+
+    x = T.lmatrix('x')
+    mask = T.matrix('mask')
+
+    # Construct the LSTM layer
+    recurrent_layer = LstmLayer(rng=rng, input=x, mask=mask, n_in=111, n_h=n_h)
+
+    logreg_layer = LogisticRegression(input=recurrent_layer.output[:-1],
+                                      n_in=n_h, n_out=111)
+
+    cost = sequence_categorical_crossentropy(logreg_layer.p_y_given_x,
+                                             x[1:],
+                                             mask[1:]) / batch_size
+
+    # create a list of all model parameters to be fit by gradient descent
+    params = logreg_layer.params + recurrent_layer.params
+
+    # create a list of gradients for all model parameters
+    grads = T.grad(cost, params)
+
+    # update_model is a function that updates the model parameters by
+    # SGD Since this model has many parameters, it would be tedious to
+    # manually create an update rule for each model parameter. We thus
+    # create the updates list by automatically looping over all
+    # (params[i], grads[i]) pairs.
+    learning_rate = 0.1
+    updates = [
+        (param_i, param_i - learning_rate * grad_i)
+        for param_i, grad_i in zip(params, grads)
+    ]
+
+    update_model = theano.function([x, mask], cost, updates=updates)
+
+    evaluate_model = theano.function([x, mask], cost)
+
+    # Define and compile a function for generating a sequence step by step.
+    x_t = T.iscalar()
+    h_p = T.vector()
+    c_p = T.vector()
+    h_t, c_t = recurrent_layer._step(T.ones(1), x_t, h_p, c_p)
+    energy = T.dot(h_t, logreg_layer.W) + logreg_layer.b
+
+    energy_exp = T.exp(energy - T.max(energy, 1)[:, None])
+
+    output = energy_exp / energy_exp.sum(1)[:, None]
+    single_step = theano.function([x_t, h_p, c_p], [output, h_t, c_t])
+
+    start_time = time.clock()
+
+    iteration = 0
+
+    for epoch in range(n_epochs):
+        print 'epoch:', epoch
+
+        for x_, mask_ in train_stream.get_epoch_iterator():
+            iteration += 1
+
+            cross_entropy = update_model(x_.T, mask_.T)
+
+
+            # Generate some text after each 20 minibatches
+            if iteration % 40 == 0:
+                try:
+                    prediction = numpy.ones(111, dtype=config.floatX) / 111.0
+                    h_p = numpy.zeros((n_h,), dtype=config.floatX)
+                    c_p = numpy.zeros((n_h,), dtype=config.floatX)
+                    initial = 'the meaning of life is '
+                    sentence = initial
+                    for char in initial:
+                        x_t = dictionary[char]
+                        prediction, h_p, c_p = single_step(x_t, h_p.flatten(),
+                                                           c_p.flatten())
+                    sample = numpy.random.multinomial(1, prediction.flatten())
+                    for i in range(450):
+                        x_t = numpy.argmax(sample)
+                        prediction, h_p, c_p = single_step(x_t, h_p.flatten(),
+                                                           c_p.flatten())
+                        sentence += reverse_mapping[x_t]
+                        sample = numpy.random.multinomial(1, prediction.flatten())
+                    print 'LSTM: "' + sentence + '"'
+                except ValueError:
+                    print 'Something went wrong during sentence generation.'
+
+            if iteration % 40 == 0:
+                print 'epoch:', epoch, '  minibatch:', iteration
+                val_scores = []
+                for x_val, mask_val in val_stream.get_epoch_iterator():
+                    val_scores.append(evaluate_model(x_val.T, mask_val.T))
+                print 'Average validation CE per sentence:', numpy.mean(val_scores)
+
+    end_time = time.clock()
+    print('Optimization complete.')
+    print('The code ran for %.2fm' % ((end_time - start_time) / 60.))
+
+
+if __name__ == '__main__':
+    train_model()