Skip to content

Commit c7a2557

Browse files
authored
Use all 3 datasets to build the vocabularies (#15)
Replaced hard cast to float32 which caused reloading of model to fail
1 parent dca0646 commit c7a2557

File tree

5 files changed

+16
-12
lines changed

5 files changed

+16
-12
lines changed

app/resources/parscit.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def post(self):
4242
data = prepare_dataset([[[token] for token in tokens]],
4343
current_app.word_to_id,
4444
current_app.char_to_id,
45+
{},
4546
current_app.model.parameters['lower'],
4647
True)
4748

@@ -81,6 +82,7 @@ def post(self):
8182
data = prepare_dataset(tokens,
8283
current_app.word_to_id,
8384
current_app.char_to_id,
85+
{},
8486
current_app.model.parameters['lower'],
8587
True)
8688

model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,8 @@ def build(self,
302302
transitions = shared((n_tags + 2, n_tags + 2), 'transitions')
303303

304304
small = -1000
305-
b_s = np.array([[small] * n_tags + [0, small]]).astype(np.float32)
306-
e_s = np.array([[small] * n_tags + [small, 0]]).astype(np.float32)
305+
b_s = np.array([[small] * n_tags + [0, small]]).astype(theano.config.floatX)
306+
e_s = np.array([[small] * n_tags + [small, 0]]).astype(theano.config.floatX)
307307
observations = T.concatenate(
308308
[tags_scores, small * T.ones((s_len, 2))],
309309
axis=1

optimization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def rmsprop(self, cost, params, lr=0.001, rho=0.9, eps=1e-6):
154154
lr = theano.shared(np.float32(lr).astype(floatX))
155155

156156
gradients = self.get_gradients(cost, params)
157-
accumulators = [theano.shared(np.zeros_like(p.get_value()).astype(np.float32)) for p in params]
157+
accumulators = [theano.shared(np.zeros_like(p.get_value()).astype(floatX)) for p in params]
158158

159159
updates = []
160160

train.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@
138138

139139
# Initialize model
140140
model = Model(parameters=parameters, models_path=models_path)
141-
logging.info("Model location: %s" % model.model_path)
141+
logging.info("Model location: %s", model.model_path)
142142

143143
# Data parameters
144144
lower = parameters['lower']
@@ -155,22 +155,24 @@
155155
##update_tag_scheme(dev_sentences, tag_scheme)
156156
##update_tag_scheme(test_sentences, tag_scheme)
157157

158+
all_sentences = train_sentences + dev_sentences + test_sentences
159+
158160
# Create a dictionary / mapping of words
159161
# If we use pretrained embeddings, we add them to the dictionary.
160162
if parameters['pre_emb']:
161-
dico_words_train = word_mapping(train_sentences, lower)[0]
163+
dico_words_train = word_mapping(all_sentences, lower)[0]
162164
dico_words, word_to_id, id_to_word = augment_with_pretrained(
163165
dico_words_train.copy(),
164166
parameters['pre_emb'],
165167
None
166168
)
167169
else:
168-
dico_words, word_to_id, id_to_word = word_mapping(train_sentences, lower)
170+
dico_words, word_to_id, id_to_word = word_mapping(all_sentences, lower)
169171
dico_words_train = dico_words
170172

171173
# Create a dictionary and a mapping for words / POS tags / tags
172-
dico_chars, char_to_id, id_to_char = char_mapping(train_sentences)
173-
dico_tags, tag_to_id, id_to_tag = tag_mapping(train_sentences)
174+
dico_chars, char_to_id, id_to_char = char_mapping(all_sentences)
175+
dico_tags, tag_to_id, id_to_tag = tag_mapping(all_sentences)
174176

175177
# Index data
176178
train_data = prepare_dataset(
@@ -229,12 +231,12 @@
229231
logging.info("Score on dev: %.5f", dev_score)
230232
logging.info("Score on test: %.5f", test_score)
231233
if dev_score > best_dev:
234+
logging.info("New best score on dev: %f. (Previously: %f)", dev_score, best_dev)
232235
best_dev = dev_score
233-
logging.info("New best score on dev.")
234236
logging.info("Saving model to disk...")
235237
model.save()
236238
if test_score > best_test:
239+
logging.info("New best score on test: %f. (Previously: %f)", test_score, best_test)
237240
best_test = test_score
238-
logging.info("New best score on test.")
239-
logging.info("Epoch %i done. Average cost: %f" % (epoch, np.mean(epoch_costs)))
241+
logging.info("Epoch %i done. Average cost: %f", epoch, np.mean(epoch_costs))
240242
model.save()

utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def set_values(name, param, pretrained):
3838
)
3939
param.set_value(np.reshape(
4040
pretrained, param_value.shape
41-
).astype(np.float32))
41+
).astype(theano.config.floatX))
4242

4343

4444
def shared(shape, name):

0 commit comments

Comments
 (0)