@@ -50,8 +50,9 @@ def __init__(self, check_paired_punct_upos=False, copy_to_enhanced=False, **kwar
50
50
Args:
51
51
check_paired_punct_upos: fix paired punctuation tokens only if their UPOS=PUNCT.
52
52
The default is false, which means that fixed punctuation is detected only
53
- based on the form with the exception of single quote / apostrophe character,
54
- which is frequently ambiguous, so UPOS=PUNCT is checked always.
53
+ based on the form with the exception of single & double quote character,
54
+ which is frequently ambiguous*, so UPOS=PUNCT is checked always.
55
+ *) Single quote can be an apostrophe. Double quote as a NOUN can be the inch symbol.
55
56
copy_to_enhanced: for all PUNCT nodes, let the enhanced depencies be the same
56
57
as the basic dependencies.
57
58
"""
@@ -65,7 +66,7 @@ def _is_punct(self, node):
65
66
return True
66
67
if self .check_paired_punct_upos :
67
68
return False
68
- if node .form == "'" :
69
+ if node .form in "'\" " :
69
70
return False
70
71
if node .form in PAIRED_PUNCT or node .form in PAIRED_PUNCT .values ():
71
72
return True
@@ -78,7 +79,7 @@ def process_tree(self, root):
78
79
for node in root .descendants :
79
80
while self ._is_punct (node .parent ):
80
81
node .parent = node .parent .parent
81
- root . draw ()
82
+
82
83
# Second, fix paired punctuations: quotes and brackets, marking them in _punct_type.
83
84
# This should be done before handling the subordinate punctuation,
84
85
# in order to prevent non-projectivities e.g. in dot-before-closing-quote style sentences:
@@ -214,7 +215,7 @@ def _causes_gap(self, node):
214
215
215
216
def _fix_paired_punct (self , root , opening_node , closing_punct ):
216
217
if (self .check_paired_punct_upos
217
- or opening_node .form == "'" ) and opening_node .upos != 'PUNCT' :
218
+ or opening_node .form in "'\" " ) and opening_node .upos != 'PUNCT' :
218
219
return
219
220
nested_level = 0
220
221
for node in root .descendants [opening_node .ord :]:
0 commit comments