Skip to content

Commit e08b4e3

Browse files
committed
updating tutorial to use nn.rnn in composition
1 parent 3d2bb57 commit e08b4e3

File tree

2 files changed

+24
-29
lines changed

2 files changed

+24
-29
lines changed

en-wordlist.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Captum
3333
Captum's
3434
CartPole
3535
Cayley
36+
CharRNN
3637
Chatbots
3738
Chen
3839
Colab
@@ -421,12 +422,14 @@ jpg
421422
json
422423
judgements
423424
jupyter
425+
kernals
424426
keypoint
425427
kwargs
426428
labelled
427429
latencies
428430
learnable
429431
learnings
432+
lineToTensor
430433
linearities
431434
loadFilename
432435
logits
@@ -460,6 +463,7 @@ namespace
460463
natively
461464
ndarrays
462465
nightlies
466+
nn
463467
num
464468
numericalize
465469
numpy

intermediate_source/char_rnn_classification_tutorial.py

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -218,51 +218,42 @@ def __getitem__(self, idx):
218218
# held hidden state and gradients which are now entirely handled by the
219219
# graph itself. This means you can implement a RNN in a very "pure" way,
220220
# as regular feed-forward layers.
221+
#
222+
# This CharRNN class implements an RNN with three components.
223+
# First, we use the `nn.RNN implemnentation <https://pytorch.org/docs/stable/generated/torch.nn.RNN.html>`__
224+
# , next we define a layer that maps the RNN hidden layers to our output and finally we apply a softmax. Using nn.RNN
225+
# leads to a significant improvement in performance (e.g. cuDNN-accelerated kernals) versus implementing
226+
# each layer as a nn.Linear. It also simplifies the implementation in forward().
221227
#
222-
# This RNN module implements a "vanilla RNN" an is just 3 linear layers
223-
# which operate on an input and hidden state, with a ``LogSoftmax`` layer
224-
# after the output.s
225-
#
226-
# forward() loops through each of the characters in the given tensor, computes each
227-
# layer and then passes the hidden layer onto to the next iteration.
228228

229229
import torch.nn as nn
230230
import torch.nn.functional as F
231231

232-
class RNN(nn.Module):
233-
def __init__(self, input_size, hidden_size, output_labels):
234-
super(RNN, self).__init__()
232+
class CharRNN(nn.Module):
233+
def __init__(self, input_size, hidden_size, output_size):
234+
super(CharRNN, self).__init__()
235235

236-
self.hidden_size = hidden_size
237-
self.output_labels = output_labels
238-
239-
self.i2h = nn.Linear(input_size, hidden_size)
240-
self.h2h = nn.Linear(hidden_size, hidden_size)
241-
self.h2o = nn.Linear(hidden_size, len(output_labels))
236+
self.rnn = nn.RNN(input_size, hidden_size)
237+
self.h2o = nn.Linear(hidden_size, output_size)
242238
self.softmax = nn.LogSoftmax(dim=1)
243239

244240
def forward(self, line_tensor):
245-
hidden = torch.zeros(1, rnn.hidden_size)
246-
output = torch.zeros(1, len(self.output_labels))
247-
248-
for i in range(line_tensor.size()[0]):
249-
input = line_tensor[i]
250-
hidden = F.tanh(self.i2h(input) + self.h2h(hidden))
251-
output = self.h2o(hidden)
252-
output = self.softmax(output)
241+
rnn_out, hidden = self.rnn(line_tensor)
242+
output = self.h2o(hidden[0])
243+
output = self.softmax(output)
253244

254245
return output
255246

256247

257248
###########################
258-
#We can then create a RNN with 128 hidden nodes and given our datasets
249+
#We can then create a RNN with 57 input nodes, 128 hidden nodes and 18 outputs.
259250

260251
n_hidden = 128
261-
rnn = RNN(n_letters, n_hidden, alldata.labels_uniq)
252+
rnn = CharRNN(n_letters, n_hidden, len(alldata.labels_uniq))
262253
print(rnn)
263254

264255
######################################################################
265-
# We can then pass our Tensor to the runn to get a predicted output and
256+
# We can then pass our Tensor to the RNN to get a predicted output and
266257
# use a helper function, label_from_output, to get a text label for the class.
267258

268259
def label_from_output(output, output_labels):
@@ -345,7 +336,7 @@ def train(rnn, training_data, n_epoch = 10, n_batch_size = 64, report_every = 50
345336
# We can now train a dataset with mini batches for a specified number of epochs
346337

347338
start = time.time()
348-
all_losses = train(rnn, train_set, n_epoch=10, learning_rate=0.2, report_every=1)
339+
all_losses = train(rnn, train_set, n_epoch=13, learning_rate=0.2, report_every=1)
349340
end = time.time()
350341
print(f"training took {end-start}s")
351342

@@ -429,9 +420,9 @@ def evaluate(rnn, testing_data, classes):
429420
#
430421
# - Get better results with a bigger and/or better shaped network
431422
#
432-
# - Vary the hyperparameters to improve performance (e.g. 250 epochs, batch size, learning rate )
433-
# - Add more linear layers
423+
# - Vary the hyperparameters to improve performance (e.g. change epochs, batch size, learning rate )
434424
# - Try the ``nn.LSTM`` and ``nn.GRU`` layers
425+
# - Change the size of the layers (e.g. fewer or more hidden nodes, additional linear layers)
435426
# - Combine multiple of these RNNs as a higher level network
436427
#
437428
# - Try with a different dataset of line -> label, for example:

0 commit comments

Comments
 (0)