@@ -218,51 +218,42 @@ def __getitem__(self, idx):
218
218
# held hidden state and gradients which are now entirely handled by the
219
219
# graph itself. This means you can implement a RNN in a very "pure" way,
220
220
# as regular feed-forward layers.
221
+ #
222
+ # This CharRNN class implements an RNN with three components.
223
+ # First, we use the `nn.RNN implemnentation <https://pytorch.org/docs/stable/generated/torch.nn.RNN.html>`__
224
+ # , next we define a layer that maps the RNN hidden layers to our output and finally we apply a softmax. Using nn.RNN
225
+ # leads to a significant improvement in performance (e.g. cuDNN-accelerated kernals) versus implementing
226
+ # each layer as a nn.Linear. It also simplifies the implementation in forward().
221
227
#
222
- # This RNN module implements a "vanilla RNN" an is just 3 linear layers
223
- # which operate on an input and hidden state, with a ``LogSoftmax`` layer
224
- # after the output.s
225
- #
226
- # forward() loops through each of the characters in the given tensor, computes each
227
- # layer and then passes the hidden layer onto to the next iteration.
228
228
229
229
import torch .nn as nn
230
230
import torch .nn .functional as F
231
231
232
- class RNN (nn .Module ):
233
- def __init__ (self , input_size , hidden_size , output_labels ):
234
- super (RNN , self ).__init__ ()
232
+ class CharRNN (nn .Module ):
233
+ def __init__ (self , input_size , hidden_size , output_size ):
234
+ super (CharRNN , self ).__init__ ()
235
235
236
- self .hidden_size = hidden_size
237
- self .output_labels = output_labels
238
-
239
- self .i2h = nn .Linear (input_size , hidden_size )
240
- self .h2h = nn .Linear (hidden_size , hidden_size )
241
- self .h2o = nn .Linear (hidden_size , len (output_labels ))
236
+ self .rnn = nn .RNN (input_size , hidden_size )
237
+ self .h2o = nn .Linear (hidden_size , output_size )
242
238
self .softmax = nn .LogSoftmax (dim = 1 )
243
239
244
240
def forward (self , line_tensor ):
245
- hidden = torch .zeros (1 , rnn .hidden_size )
246
- output = torch .zeros (1 , len (self .output_labels ))
247
-
248
- for i in range (line_tensor .size ()[0 ]):
249
- input = line_tensor [i ]
250
- hidden = F .tanh (self .i2h (input ) + self .h2h (hidden ))
251
- output = self .h2o (hidden )
252
- output = self .softmax (output )
241
+ rnn_out , hidden = self .rnn (line_tensor )
242
+ output = self .h2o (hidden [0 ])
243
+ output = self .softmax (output )
253
244
254
245
return output
255
246
256
247
257
248
###########################
258
- #We can then create a RNN with 128 hidden nodes and given our datasets
249
+ #We can then create a RNN with 57 input nodes, 128 hidden nodes and 18 outputs.
259
250
260
251
n_hidden = 128
261
- rnn = RNN (n_letters , n_hidden , alldata .labels_uniq )
252
+ rnn = CharRNN (n_letters , n_hidden , len ( alldata .labels_uniq ) )
262
253
print (rnn )
263
254
264
255
######################################################################
265
- # We can then pass our Tensor to the runn to get a predicted output and
256
+ # We can then pass our Tensor to the RNN to get a predicted output and
266
257
# use a helper function, label_from_output, to get a text label for the class.
267
258
268
259
def label_from_output (output , output_labels ):
@@ -345,7 +336,7 @@ def train(rnn, training_data, n_epoch = 10, n_batch_size = 64, report_every = 50
345
336
# We can now train a dataset with mini batches for a specified number of epochs
346
337
347
338
start = time .time ()
348
- all_losses = train (rnn , train_set , n_epoch = 10 , learning_rate = 0.2 , report_every = 1 )
339
+ all_losses = train (rnn , train_set , n_epoch = 13 , learning_rate = 0.2 , report_every = 1 )
349
340
end = time .time ()
350
341
print (f"training took { end - start } s" )
351
342
@@ -429,9 +420,9 @@ def evaluate(rnn, testing_data, classes):
429
420
#
430
421
# - Get better results with a bigger and/or better shaped network
431
422
#
432
- # - Vary the hyperparameters to improve performance (e.g. 250 epochs, batch size, learning rate )
433
- # - Add more linear layers
423
+ # - Vary the hyperparameters to improve performance (e.g. change epochs, batch size, learning rate )
434
424
# - Try the ``nn.LSTM`` and ``nn.GRU`` layers
425
+ # - Change the size of the layers (e.g. fewer or more hidden nodes, additional linear layers)
435
426
# - Combine multiple of these RNNs as a higher level network
436
427
#
437
428
# - Try with a different dataset of line -> label, for example:
0 commit comments