|
23 | 23 | from nlp_architect.utils.io import validate_existing_directory |
24 | 24 | from nlp_architect.utils.text import SpacyInstance |
25 | 25 |
|
| 26 | +sep = os.sep |
26 | 27 | PAD = "<pad>" |
27 | 28 | SOS = "<sos>" |
28 | 29 | UNK = "<unk>" |
@@ -72,10 +73,10 @@ def get_glove_matrix(vocabulary_list, download_path): |
72 | 73 | """ |
73 | 74 | Function to obtain preprocessed glove embeddings matrix |
74 | 75 | """ |
75 | | - save_file_name = download_path + "glove.trimmed.300" |
| 76 | + save_file_name = download_path + sep + "glove.trimmed.300" |
76 | 77 | if not os.path.exists(save_file_name + ".npz"): |
77 | 78 | vocab_len = len(vocabulary_list) |
78 | | - glove_path = os.path.join(download_path + "glove.6B.300d.txt") |
| 79 | + glove_path = os.path.join(download_path + sep + "glove.6B.300d.txt") |
79 | 80 | glove_matrix = np.zeros((vocab_len, 300)) |
80 | 81 | count = 0 |
81 | 82 | with open(glove_path) as f: |
@@ -253,13 +254,13 @@ def get_ids_list(data_list, vocab): |
253 | 254 | dev_para_ids = get_ids_list(dev_para, vocab_dict) |
254 | 255 | dev_question_ids = get_ids_list(dev_question, vocab_dict) |
255 | 256 |
|
256 | | - final_data_dict = {"train.ids.context": train_para_ids, |
257 | | - "train.ids.question": train_question_ids, |
258 | | - "dev.ids.context": dev_para_ids, |
259 | | - "dev.ids.question": dev_question_ids, |
260 | | - "vocab.dat": vocab_list, |
261 | | - "train.span": train_ans, |
262 | | - "dev.span": dev_ans} |
| 257 | + final_data_dict = {sep + "train.ids.context": train_para_ids, |
| 258 | + sep + "train.ids.question": train_question_ids, |
| 259 | + sep + "dev.ids.context": dev_para_ids, |
| 260 | + sep + "dev.ids.question": dev_question_ids, |
| 261 | + sep + "vocab.dat": vocab_list, |
| 262 | + sep + "train.span": train_ans, |
| 263 | + sep + "dev.span": dev_ans} |
263 | 264 |
|
264 | 265 | print("writing data to files") |
265 | 266 | write_to_file(final_data_dict, data_path) |
0 commit comments