Skip to content

Commit 604ea47

Browse files
committed
Quote reserved charcters for CoreNLP. (#16)
1 parent 19d3be1 commit 604ea47

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

src/parserindexer/corenlpparser.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# The following two lines make CoreNLP happy
55
reload(sys)
66
sys.setdefaultencoding('UTF8')
7+
import urllib
78
from parser import *
89
from journalparser import *
910
from pycorenlp import StanfordCoreNLP
@@ -35,6 +36,8 @@ def parse_names(self, text, meta):
3536
if text[0].isspace(): # dont strip white spaces
3637
text = '.' + text[1:]
3738

39+
# Quote (with percent-encoding) reserved characters in URL for CorenLP
40+
text = urllib.quote(text)
3841
output = self.corenlp.annotate(text, properties=self.props)
3942
# flatten sentences and tokens
4043
tokenlists = [s['tokens'] for s in output['sentences']]

0 commit comments

Comments
 (0)