-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathget_dir_idxes.py
More file actions
20 lines (17 loc) · 809 Bytes
/
get_dir_idxes.py
File metadata and controls
20 lines (17 loc) · 809 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import json
import os
for subset in ['dev', 'test']:
full_lines = {}
dir_idxes = []
with open(f'./full_files/with_original/{subset}.txt', 'r', encoding='utf8') as fp:
for lidx, line in enumerate(fp):
if len(line) < 2:
raise AssertionError(f'Empty line at {lidx} in {subset}')
full_lines[line] = lidx
with open(f"./levyholt_files/dir_files/with_original/{subset}.txt", 'r', encoding='utf8') as fp:
for line in fp:
if line not in full_lines:
raise AssertionError(f'{line} not in {subset}')
dir_idxes.append(full_lines[line])
with open(f'./levyholt_files/dir_files/with_original/{subset}_idxes.json', 'w', encoding='utf8') as fp:
json.dump(dir_idxes, fp, ensure_ascii=False, indent=4)