-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathlanguage_model.py
More file actions
30 lines (26 loc) · 1.07 KB
/
language_model.py
File metadata and controls
30 lines (26 loc) · 1.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import numpy as np
from segtok import tokenizer
import torch as th
from torch import nn
# Using a basic RNN/LSTM for Language modeling
class LanguageModel(nn.Module):
def __init__(self, vocab_size, rnn_size, num_layers=1, dropout=0):
super().__init__()
# Create an embedding layer of shape [vocab_size, rnn_size]
# Use nn.Embedding
# That will map each word in our vocab into a vector of rnn_size size.
self.embedding = your_code
# Create an LSTM layer of rnn_size size. Use any features you wish.
# We will be using batch_first convention
self.lstm = your_code
# LSTM layer does not add dropout to the last hidden output.
# Add this if you wish.
# self.dropout = nn.Dropout(p=dropout)
# Use a dense layer to project the outputs of the RNN cell into logits of
# the size of vocabulary (vocab_size).
self.output = nn.Linear(rnn_size, vocab_size)
def forward(self,x):
embeds = your_code
lstm_out, _ = your_code
logits = your_code
return logits