-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy paththeir_code_food_it_classification.py
More file actions
162 lines (120 loc) · 4.86 KB
/
their_code_food_it_classification.py
File metadata and controls
162 lines (120 loc) · 4.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import torch
BATCH_SIZE = 30
EPOCHS = 30
LEARNING_RATE = 3e-2
SEED = 0
def read_data(filename):
labels, sentences = [], []
with open(filename) as f:
for line in f:
t = float(line[0])
labels.append([t, 1-t])
sentences.append(line[1:].strip())
return labels, sentences
train_labels, train_data = read_data('data/mc_train_data.txt')
val_labels, val_data = read_data('data/mc_dev_data.txt')
test_labels, test_data = read_data('data/mc_test_data.txt')
import os
TESTING = int(os.environ.get('TEST_NOTEBOOKS', '0'))
if TESTING:
train_labels, train_data = train_labels[:2], train_data[:2]
val_labels, val_data = val_labels[:2], val_data[:2]
test_labels, test_data = test_labels[:2], test_data[:2]
EPOCHS = 1
train_data[:5]
train_labels[:5]
from lambeq import BobcatParser
parser = BobcatParser(verbose='text')
train_diagrams = parser.sentences2diagrams(train_data)
val_diagrams = parser.sentences2diagrams(val_data)
test_diagrams = parser.sentences2diagrams(test_data)
from lambeq.backend.tensor import Dim
from lambeq import AtomicType, SpiderAnsatz
ansatz = SpiderAnsatz({AtomicType.NOUN: Dim(2),
AtomicType.SENTENCE: Dim(2)})
train_circuits = [ansatz(diagram) for diagram in train_diagrams]
val_circuits = [ansatz(diagram) for diagram in val_diagrams]
test_circuits = [ansatz(diagram) for diagram in test_diagrams]
train_circuits[0].draw()
from lambeq import PytorchModel
all_circuits = train_circuits + val_circuits + test_circuits
model = PytorchModel.from_diagrams(all_circuits)
sig = torch.sigmoid
def accuracy(y_hat, y):
return torch.sum(torch.eq(torch.round(sig(y_hat)), y))/len(y)/2 # half due to double-counting
eval_metrics = {"acc": accuracy}
from lambeq import PytorchTrainer
trainer = PytorchTrainer(
model=model,
loss_function=torch.nn.BCEWithLogitsLoss(),
optimizer=torch.optim.AdamW,
learning_rate=LEARNING_RATE,
epochs=EPOCHS,
evaluate_functions=eval_metrics,
evaluate_on_train=True,
verbose='text',
seed=SEED)
from lambeq import Dataset
train_dataset = Dataset(
train_circuits,
train_labels,
batch_size=BATCH_SIZE)
val_dataset = Dataset(val_circuits, val_labels, shuffle=False)
trainer.fit(train_dataset, val_dataset, eval_interval=1, log_interval=5)
import matplotlib.pyplot as plt
import numpy as np
fig1, ((ax_tl, ax_tr), (ax_bl, ax_br)) = plt.subplots(2, 2, sharey='row', figsize=(10, 6))
ax_tl.set_title('Training set')
ax_tr.set_title('Development set')
ax_bl.set_xlabel('Epochs')
ax_br.set_xlabel('Epochs')
ax_bl.set_ylabel('Accuracy')
ax_tl.set_ylabel('Loss')
colours = iter(plt.rcParams['axes.prop_cycle'].by_key()['color'])
range_ = np.arange(1, trainer.epochs+1)
ax_tl.plot(range_, trainer.train_epoch_costs, color=next(colours))
ax_bl.plot(range_, trainer.train_eval_results['acc'], color=next(colours))
ax_tr.plot(range_, trainer.val_costs, color=next(colours))
ax_br.plot(range_, trainer.val_eval_results['acc'], color=next(colours))
# print test accuracy
test_acc = accuracy(model(test_circuits), torch.tensor(test_labels))
print('Test accuracy:', test_acc.item())
# class MyCustomModel(PytorchModel):
# def __init__(self):
# super().__init__()
# self.net = torch.nn.Linear(2, 2)
# def forward(self, input):
# """define a custom forward pass here"""
# preds = self.get_diagram_output(input)
# preds = self.net(preds.float())
# return preds
# custom_model = MyCustomModel.from_diagrams(all_circuits)
# custom_model_trainer = PytorchTrainer(
# model=custom_model,
# loss_function=torch.nn.BCEWithLogitsLoss(),
# optimizer=torch.optim.AdamW,
# learning_rate=LEARNING_RATE,
# epochs=EPOCHS,
# evaluate_functions=eval_metrics,
# evaluate_on_train=True,
# verbose='text',
# seed=SEED)
# custom_model_trainer.fit(train_dataset, val_dataset, log_interval=5)
# import matplotlib.pyplot as plt
# import numpy as np
# fig1, ((ax_tl, ax_tr), (ax_bl, ax_br)) = plt.subplots(2, 2, sharey='row', figsize=(10, 6))
# ax_tl.set_title('Training set')
# ax_tr.set_title('Development set')
# ax_bl.set_xlabel('Epochs')
# ax_br.set_xlabel('Epochs')
# ax_bl.set_ylabel('Accuracy')
# ax_tl.set_ylabel('Loss')
# colours = iter(plt.rcParams['axes.prop_cycle'].by_key()['color'])
# range_ = np.arange(1, trainer.epochs+1)
# ax_tl.plot(range_, custom_model_trainer.train_epoch_costs, color=next(colours))
# ax_bl.plot(range_, custom_model_trainer.train_eval_results['acc'], color=next(colours))
# ax_tr.plot(range_, custom_model_trainer.val_costs, color=next(colours))
# ax_br.plot(range_, custom_model_trainer.val_eval_results['acc'], color=next(colours))
# # print test accuracy
# test_acc = accuracy(model(test_circuits), torch.tensor(test_labels))
# print('Test accuracy:', test_acc.item())