Skip to content

Commit 6e8afbd

Browse files
committed
Truncate hyphenated components (like 'Fe-rich') to just 'Fe'.
1 parent 2cf0e85 commit 6e8afbd

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

src/parserindexer/json2csv.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,11 @@ def convert_json_to_csv(jsonfile):
5656
# expand the target name and skip the next relation
5757
start_target = int(r['target_ids'][0].split('_')[1])
5858
end_target = int(r['target_ids'][0].split('_')[2])
59+
# These are arrays, but for auto-annotations,
60+
# they will only ever have one item
5961
targ_name = r['target_names'][0]
62+
cont_name = r['cont_names'][0]
63+
6064
if start_target in skip_inds:
6165
continue
6266
next_rels = [r2 for r2 in rels if
@@ -76,14 +80,19 @@ def convert_json_to_csv(jsonfile):
7680
next_rel['target_names'][0]))
7781
targ_name += ' ' + next_rel['target_names'][0]
7882
skip_inds.append(start_next_target)
79-
83+
84+
# If cont_name is something like Fe-rich or Mg_sulfate,
85+
# only keep the first bit.
86+
if '-' in cont_name:
87+
cont_name = cont_name[:cont_name.find('-')]
88+
elif '_' in cont_name:
89+
cont_name = cont_name[:cont_name.find('_')]
90+
8091
outf.write(',%s,%s,%s,"%s"\n' %
8192
(docid,
82-
# These are arrays, but for auto-annotations,
83-
# they will only ever have one item
8493
#r['target_names'][0],
8594
targ_name,
86-
r['cont_names'][0],
95+
cont_name,
8796
r['sentence']))
8897
# build URL manually?
8998
pbar.update(i)

0 commit comments

Comments
 (0)