-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel.py
More file actions
123 lines (59 loc) · 3.14 KB
/
model.py
File metadata and controls
123 lines (59 loc) · 3.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import torch
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
class EncoderCNN(nn.Module):
def __init__(self, embed_size):
super(EncoderCNN, self).__init__()
resnet = models.resnet50(pretrained=True)
for param in resnet.parameters():
param.requires_grad_(False)
modules = list(resnet.children())[:-1]
self.resnet = nn.Sequential(*modules)
self.embed = nn.Linear(resnet.fc.in_features, embed_size)
# add BatchNormalization layer
self.bn1 = nn.BatchNorm1d(num_features=embed_size)
def forward(self, images):
features = self.resnet(images)
features = features.view(features.size(0), -1)
features = self.embed(features)
#print(features)
#tensor_max_value = torch.max(features)
#print(f'Max Values is {tensor_max_value}')
features = self.bn1(features)
#print(features)
#tensor_max_value = torch.max(features)
#print(f'Max Values is {tensor_max_value}')
return features
class DecoderRNN(nn.Module):
def __init__(self, embed_size, hidden_size, vocab_size, num_layers=1):
super().__init__()
self.embed_size = embed_size
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.num_layers = num_layers
#define layers
self.lstm = nn.LSTM(embed_size,hidden_size,num_layers=1,batch_first=True)
self.lin = nn.Linear(in_features=hidden_size,out_features=vocab_size)
self.embedding_words = nn.Embedding(vocab_size,embed_size)
def forward(self, features, captions):
# embed captions to desired length and usequeeze features tensor
captions = self.embedding_words(captions)
features = features.unsqueeze(1)
# concatenate feature and caption tensors
inputs = torch.cat((features,captions[:,:-1]),dim=1)
output, _ =self.lstm(inputs)
output = self.lin(output)
#output = F.softmax(self.lin(output),dim=1)
return output
def sample(self, inputs, states=None, max_len=20):
" accepts pre-processed image tensor (inputs) and returns predicted sentence (list of tensor ids of length max_len) "
sentence = []
for count in range(max_len):
output, states = self.lstm(inputs,states)
tokens = self.lin(output)
value, index = torch.max(tokens,dim=2)
sentence.append(index.item())
# Update inputs
inputs =self.embedding_words(index)
return sentence