Skip to content

Commit 73baf7a

Browse files
committed
Add Theano recurrent neural networks notebook.
1 parent 98375d2 commit 73baf7a

File tree

9 files changed

+1533
-0
lines changed

9 files changed

+1533
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ IPython Notebook(s) demonstrating deep learning functionality.
9696
| [theano-intro](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/deep-learning/theano-tutorial/intro_theano/intro_theano.ipynb) | Intro to Theano, which allows you to define, optimize, and evaluate mathematical expressions involving multi-dimensional arrays efficiently. It can use GPUs and perform efficient symbolic differentiation. |
9797
| [theano-scan](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/deep-learning/theano-tutorial/scan_tutorial/scan_tutorial.ipynb) | Learn scans, a mechanism to perform loops in a Theano graph. |
9898
| [theano-logistic](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/deep-learning/theano-tutorial/intro_theano/logistic_regression.ipynb) | Implement logistic regression in Theano. |
99+
| [theano-rnn](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/deep-learning/theano-tutorial/rnn_tutorial/simple_rnn.ipynb) | Implement recurrent neural networks in Theano. |
99100
| [deep-dream](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/deep-learning/deep-dream/dream.ipynb) | Caffe-based computer vision program which uses a convolutional neural network to find and enhance patterns in images. |
100101

101102
<br/>
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
all: instruction.pdf rnn_lstm.pdf
2+
3+
instruction.pdf: slides_source/instruction.tex
4+
cd slides_source; pdflatex --shell-escape instruction.tex
5+
cd slides_source; pdflatex --shell-escape instruction.tex
6+
cd slides_source; pdflatex --shell-escape instruction.tex
7+
mv slides_source/instruction.pdf .
8+
9+
rnn_lstm.pdf: slides_source/rnn_lstm.tex
10+
cd slides_source; pdflatex --shell-escape rnn_lstm.tex
11+
cd slides_source; pdflatex --shell-escape rnn_lstm.tex
12+
cd slides_source; pdflatex --shell-escape rnn_lstm.tex
13+
mv slides_source/rnn_lstm.pdf .
Binary file not shown.

deep-learning/theano-tutorial/rnn_tutorial/lstm_text.ipynb

Lines changed: 508 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
import cPickle as pkl
2+
import time
3+
4+
import numpy
5+
import theano
6+
from theano import config
7+
import theano.tensor as T
8+
from theano.tensor.nnet import categorical_crossentropy
9+
10+
from fuel.datasets import TextFile
11+
from fuel.streams import DataStream
12+
from fuel.schemes import ConstantScheme
13+
from fuel.transformers import Batch, Padding
14+
15+
16+
# These files can be downloaded from
17+
# http://www-etud.iro.umontreal.ca/~brakelp/train.txt.gz
18+
# http://www-etud.iro.umontreal.ca/~brakelp/dictionary.pkl
19+
# don't forget to change the paths and gunzip train.txt.gz
20+
TRAIN_FILE = '/u/brakelp/temp/traindata.txt'
21+
VAL_FILE = '/u/brakelp/temp/valdata.txt'
22+
DICT_FILE = '/u/brakelp/temp/dictionary.pkl'
23+
24+
25+
def sequence_categorical_crossentropy(prediction, targets, mask):
26+
prediction_flat = prediction.reshape(((prediction.shape[0] *
27+
prediction.shape[1]),
28+
prediction.shape[2]), ndim=2)
29+
targets_flat = targets.flatten()
30+
mask_flat = mask.flatten()
31+
ce = categorical_crossentropy(prediction_flat, targets_flat)
32+
return T.sum(ce * mask_flat)
33+
34+
35+
def gauss_weight(ndim_in, ndim_out=None, sd=.005):
36+
if ndim_out is None:
37+
ndim_out = ndim_in
38+
W = numpy.random.randn(ndim_in, ndim_out) * sd
39+
return numpy.asarray(W, dtype=config.floatX)
40+
41+
42+
class LogisticRegression(object):
43+
"""Multi-class Logistic Regression Class
44+
45+
The logistic regression is fully described by a weight matrix :math:`W`
46+
and bias vector :math:`b`. Classification is done by projecting data
47+
points onto a set of hyperplanes, the distance to which is used to
48+
determine a class membership probability.
49+
"""
50+
51+
def __init__(self, input, n_in, n_out):
52+
""" Initialize the parameters of the logistic regression
53+
54+
:type input: theano.tensor.TensorType
55+
:param input: symbolic variable that describes the input of the
56+
architecture (one minibatch)
57+
58+
:type n_in: int
59+
:param n_in: number of input units, the dimension of the space in
60+
which the datapoints lie
61+
62+
:type n_out: int
63+
:param n_out: number of output units, the dimension of the space in
64+
which the labels lie
65+
66+
"""
67+
68+
# initialize with 0 the weights W as a matrix of shape (n_in, n_out)
69+
self.W = theano.shared(value=numpy.zeros((n_in, n_out),
70+
dtype=theano.config.floatX),
71+
name='W', borrow=True)
72+
# initialize the baises b as a vector of n_out 0s
73+
self.b = theano.shared(value=numpy.zeros((n_out,),
74+
dtype=theano.config.floatX),
75+
name='b', borrow=True)
76+
77+
# compute vector of class-membership probabilities in symbolic form
78+
energy = T.dot(input, self.W) + self.b
79+
energy_exp = T.exp(energy - T.max(energy, 2)[:, :, None])
80+
pmf = energy_exp / energy_exp.sum(2)[:, :, None]
81+
self.p_y_given_x = pmf
82+
83+
# compute prediction as class whose probability is maximal in
84+
# symbolic form
85+
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
86+
87+
# parameters of the model
88+
self.params = [self.W, self.b]
89+
90+
91+
def index_dot(indices, w):
92+
return w[indices.flatten()]
93+
94+
95+
class LstmLayer:
96+
97+
def __init__(self, rng, input, mask, n_in, n_h):
98+
99+
# Init params
100+
self.W_i = theano.shared(gauss_weight(n_in, n_h), 'W_i', borrow=True)
101+
self.W_f = theano.shared(gauss_weight(n_in, n_h), 'W_f', borrow=True)
102+
self.W_c = theano.shared(gauss_weight(n_in, n_h), 'W_c', borrow=True)
103+
self.W_o = theano.shared(gauss_weight(n_in, n_h), 'W_o', borrow=True)
104+
105+
self.U_i = theano.shared(gauss_weight(n_h), 'U_i', borrow=True)
106+
self.U_f = theano.shared(gauss_weight(n_h), 'U_f', borrow=True)
107+
self.U_c = theano.shared(gauss_weight(n_h), 'U_c', borrow=True)
108+
self.U_o = theano.shared(gauss_weight(n_h), 'U_o', borrow=True)
109+
110+
self.b_i = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
111+
'b_i', borrow=True)
112+
self.b_f = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
113+
'b_f', borrow=True)
114+
self.b_c = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
115+
'b_c', borrow=True)
116+
self.b_o = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
117+
'b_o', borrow=True)
118+
119+
self.params = [self.W_i, self.W_f, self.W_c, self.W_o,
120+
self.U_i, self.U_f, self.U_c, self.U_o,
121+
self.b_i, self.b_f, self.b_c, self.b_o]
122+
123+
outputs_info = [T.zeros((input.shape[1], n_h)),
124+
T.zeros((input.shape[1], n_h))]
125+
126+
rval, updates = theano.scan(self._step,
127+
sequences=[mask, input],
128+
outputs_info=outputs_info)
129+
130+
# self.output is in the format (batchsize, n_h)
131+
self.output = rval[0]
132+
133+
def _step(self, m_, x_, h_, c_):
134+
135+
i_preact = (index_dot(x_, self.W_i) +
136+
T.dot(h_, self.U_i) + self.b_i)
137+
i = T.nnet.sigmoid(i_preact)
138+
139+
f_preact = (index_dot(x_, self.W_f) +
140+
T.dot(h_, self.U_f) + self.b_f)
141+
f = T.nnet.sigmoid(f_preact)
142+
143+
o_preact = (index_dot(x_, self.W_o) +
144+
T.dot(h_, self.U_o) + self.b_o)
145+
o = T.nnet.sigmoid(o_preact)
146+
147+
c_preact = (index_dot(x_, self.W_c) +
148+
T.dot(h_, self.U_c) + self.b_c)
149+
c = T.tanh(c_preact)
150+
151+
c = f * c_ + i * c
152+
c = m_[:, None] * c + (1. - m_)[:, None] * c_
153+
154+
h = o * T.tanh(c)
155+
h = m_[:, None] * h + (1. - m_)[:, None] * h_
156+
157+
return h, c
158+
159+
160+
def train_model(batch_size=100, n_h=50, n_epochs=40):
161+
162+
# Load the datasets with Fuel
163+
dictionary = pkl.load(open(DICT_FILE, 'r'))
164+
dictionary['~'] = len(dictionary)
165+
reverse_mapping = dict((j, i) for i, j in dictionary.items())
166+
167+
print("Loading the data")
168+
train = TextFile(files=[TRAIN_FILE],
169+
dictionary=dictionary,
170+
unk_token='~',
171+
level='character',
172+
preprocess=str.lower,
173+
bos_token=None,
174+
eos_token=None)
175+
176+
train_stream = DataStream.default_stream(train)
177+
178+
# organize data in batches and pad shorter sequences with zeros
179+
train_stream = Batch(train_stream,
180+
iteration_scheme=ConstantScheme(batch_size))
181+
train_stream = Padding(train_stream)
182+
183+
# idem dito for the validation text
184+
val = TextFile(files=[VAL_FILE],
185+
dictionary=dictionary,
186+
unk_token='~',
187+
level='character',
188+
preprocess=str.lower,
189+
bos_token=None,
190+
eos_token=None)
191+
192+
val_stream = DataStream.default_stream(val)
193+
194+
# organize data in batches and pad shorter sequences with zeros
195+
val_stream = Batch(val_stream,
196+
iteration_scheme=ConstantScheme(batch_size))
197+
val_stream = Padding(val_stream)
198+
199+
print('Building model')
200+
201+
# Set the random number generator' seeds for consistency
202+
rng = numpy.random.RandomState(12345)
203+
204+
x = T.lmatrix('x')
205+
mask = T.matrix('mask')
206+
207+
# Construct the LSTM layer
208+
recurrent_layer = LstmLayer(rng=rng, input=x, mask=mask, n_in=111, n_h=n_h)
209+
210+
logreg_layer = LogisticRegression(input=recurrent_layer.output[:-1],
211+
n_in=n_h, n_out=111)
212+
213+
cost = sequence_categorical_crossentropy(logreg_layer.p_y_given_x,
214+
x[1:],
215+
mask[1:]) / batch_size
216+
217+
# create a list of all model parameters to be fit by gradient descent
218+
params = logreg_layer.params + recurrent_layer.params
219+
220+
# create a list of gradients for all model parameters
221+
grads = T.grad(cost, params)
222+
223+
# update_model is a function that updates the model parameters by
224+
# SGD Since this model has many parameters, it would be tedious to
225+
# manually create an update rule for each model parameter. We thus
226+
# create the updates list by automatically looping over all
227+
# (params[i], grads[i]) pairs.
228+
learning_rate = 0.1
229+
updates = [
230+
(param_i, param_i - learning_rate * grad_i)
231+
for param_i, grad_i in zip(params, grads)
232+
]
233+
234+
update_model = theano.function([x, mask], cost, updates=updates)
235+
236+
evaluate_model = theano.function([x, mask], cost)
237+
238+
# Define and compile a function for generating a sequence step by step.
239+
x_t = T.iscalar()
240+
h_p = T.vector()
241+
c_p = T.vector()
242+
h_t, c_t = recurrent_layer._step(T.ones(1), x_t, h_p, c_p)
243+
energy = T.dot(h_t, logreg_layer.W) + logreg_layer.b
244+
245+
energy_exp = T.exp(energy - T.max(energy, 1)[:, None])
246+
247+
output = energy_exp / energy_exp.sum(1)[:, None]
248+
single_step = theano.function([x_t, h_p, c_p], [output, h_t, c_t])
249+
250+
start_time = time.clock()
251+
252+
iteration = 0
253+
254+
for epoch in range(n_epochs):
255+
print 'epoch:', epoch
256+
257+
for x_, mask_ in train_stream.get_epoch_iterator():
258+
iteration += 1
259+
260+
cross_entropy = update_model(x_.T, mask_.T)
261+
262+
263+
# Generate some text after each 20 minibatches
264+
if iteration % 40 == 0:
265+
try:
266+
prediction = numpy.ones(111, dtype=config.floatX) / 111.0
267+
h_p = numpy.zeros((n_h,), dtype=config.floatX)
268+
c_p = numpy.zeros((n_h,), dtype=config.floatX)
269+
initial = 'the meaning of life is '
270+
sentence = initial
271+
for char in initial:
272+
x_t = dictionary[char]
273+
prediction, h_p, c_p = single_step(x_t, h_p.flatten(),
274+
c_p.flatten())
275+
sample = numpy.random.multinomial(1, prediction.flatten())
276+
for i in range(450):
277+
x_t = numpy.argmax(sample)
278+
prediction, h_p, c_p = single_step(x_t, h_p.flatten(),
279+
c_p.flatten())
280+
sentence += reverse_mapping[x_t]
281+
sample = numpy.random.multinomial(1, prediction.flatten())
282+
print 'LSTM: "' + sentence + '"'
283+
except ValueError:
284+
print 'Something went wrong during sentence generation.'
285+
286+
if iteration % 40 == 0:
287+
print 'epoch:', epoch, ' minibatch:', iteration
288+
val_scores = []
289+
for x_val, mask_val in val_stream.get_epoch_iterator():
290+
val_scores.append(evaluate_model(x_val.T, mask_val.T))
291+
print 'Average validation CE per sentence:', numpy.mean(val_scores)
292+
293+
end_time = time.clock()
294+
print('Optimization complete.')
295+
print('The code ran for %.2fm' % ((end_time - start_time) / 60.))
296+
297+
298+
if __name__ == '__main__':
299+
train_model()
Binary file not shown.

0 commit comments

Comments
 (0)