Skip to content

Commit 389dd4a

Browse files
committed
Handle documents where resourceName is a list instead of a string. (#16)
1 parent 7370e4c commit 389dd4a

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

src/parserindexer/json2brat.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,12 @@ def convert_json_to_brat(jsonfile, outdir):
2929

3030
# Output relevant annotations into a brat .ann file
3131
ners = d['metadata']['ner']
32-
outfn = os.path.join(outdir,
33-
d['metadata']['resourceName'][:-4] + '.ann')
32+
res_name = d['metadata']['resourceName']
33+
if type(res_name) == list:
34+
# Sometimes Tika returns this as something like
35+
# "resourceName": ["2005_1725.pdf", "High Quality.joboptions"]
36+
res_name = res_name[0]
37+
outfn = os.path.join(outdir, res_name[:-4] + '.ann')
3438
outf = io.open(outfn, 'w', encoding='utf8')
3539
print 'Writing to', outfn
3640
for (t, n) in enumerate(ners):

0 commit comments

Comments
 (0)