|
138 | 138 |
|
139 | 139 | # Initialize model |
140 | 140 | model = Model(parameters=parameters, models_path=models_path) |
141 | | -logging.info("Model location: %s" % model.model_path) |
| 141 | +logging.info("Model location: %s", model.model_path) |
142 | 142 |
|
143 | 143 | # Data parameters |
144 | 144 | lower = parameters['lower'] |
|
155 | 155 | ##update_tag_scheme(dev_sentences, tag_scheme) |
156 | 156 | ##update_tag_scheme(test_sentences, tag_scheme) |
157 | 157 |
|
| 158 | +all_sentences = train_sentences + dev_sentences + test_sentences |
| 159 | + |
158 | 160 | # Create a dictionary / mapping of words |
159 | 161 | # If we use pretrained embeddings, we add them to the dictionary. |
160 | 162 | if parameters['pre_emb']: |
161 | | - dico_words_train = word_mapping(train_sentences, lower)[0] |
| 163 | + dico_words_train = word_mapping(all_sentences, lower)[0] |
162 | 164 | dico_words, word_to_id, id_to_word = augment_with_pretrained( |
163 | 165 | dico_words_train.copy(), |
164 | 166 | parameters['pre_emb'], |
165 | 167 | None |
166 | 168 | ) |
167 | 169 | else: |
168 | | - dico_words, word_to_id, id_to_word = word_mapping(train_sentences, lower) |
| 170 | + dico_words, word_to_id, id_to_word = word_mapping(all_sentences, lower) |
169 | 171 | dico_words_train = dico_words |
170 | 172 |
|
171 | 173 | # Create a dictionary and a mapping for words / POS tags / tags |
172 | | -dico_chars, char_to_id, id_to_char = char_mapping(train_sentences) |
173 | | -dico_tags, tag_to_id, id_to_tag = tag_mapping(train_sentences) |
| 174 | +dico_chars, char_to_id, id_to_char = char_mapping(all_sentences) |
| 175 | +dico_tags, tag_to_id, id_to_tag = tag_mapping(all_sentences) |
174 | 176 |
|
175 | 177 | # Index data |
176 | 178 | train_data = prepare_dataset( |
|
229 | 231 | logging.info("Score on dev: %.5f", dev_score) |
230 | 232 | logging.info("Score on test: %.5f", test_score) |
231 | 233 | if dev_score > best_dev: |
| 234 | + logging.info("New best score on dev: %f. (Previously: %f)", dev_score, best_dev) |
232 | 235 | best_dev = dev_score |
233 | | - logging.info("New best score on dev.") |
234 | 236 | logging.info("Saving model to disk...") |
235 | 237 | model.save() |
236 | 238 | if test_score > best_test: |
| 239 | + logging.info("New best score on test: %f. (Previously: %f)", test_score, best_test) |
237 | 240 | best_test = test_score |
238 | | - logging.info("New best score on test.") |
239 | | - logging.info("Epoch %i done. Average cost: %f" % (epoch, np.mean(epoch_costs))) |
| 241 | + logging.info("Epoch %i done. Average cost: %f", epoch, np.mean(epoch_costs)) |
240 | 242 | model.save() |
0 commit comments