Skip to content

Commit 74c109d

Browse files
committed
Save the multi-word names out; sort names by document position.
1 parent b42442f commit 74c109d

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

src/parserindexer/corenlpparser.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def parse_names(self, text, meta):
4545
# Handle multi-word tokens:
4646
# Merge any adjacent Target tokens, if of the same type and
4747
# separated by a space, into one span.
48+
names.sort(key=lambda x: int(x['begin']))
4849
new_names = []
4950
skip_names = []
5051
for n in names:
@@ -68,7 +69,7 @@ def parse_names(self, text, meta):
6869
print('%d -> %d NERs' % (len(names), len(new_names)))
6970

7071
if names:
71-
meta['ner'] = names
72+
meta['ner'] = new_names
7273
meta['X-Parsed-By'].append(CoreNLPParser.CORENLP_PARSER)
7374
meta['sentences'] = output['sentences']
7475
return meta

0 commit comments

Comments
 (0)