Skip to content

Commit e00e7fe

Browse files
committed
ud.FixPunct not all double quotation marks are punctuation
If the double quotation symbols in 'buy 17" or 16" wheels' are tagged as NOUNs, we should not consider them a punctuation.
1 parent 6de0c78 commit e00e7fe

File tree

1 file changed

+6
-5
lines changed

1 file changed

+6
-5
lines changed

udapi/block/ud/fixpunct.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,9 @@ def __init__(self, check_paired_punct_upos=False, copy_to_enhanced=False, **kwar
5050
Args:
5151
check_paired_punct_upos: fix paired punctuation tokens only if their UPOS=PUNCT.
5252
The default is false, which means that fixed punctuation is detected only
53-
based on the form with the exception of single quote / apostrophe character,
54-
which is frequently ambiguous, so UPOS=PUNCT is checked always.
53+
based on the form with the exception of single & double quote character,
54+
which is frequently ambiguous*, so UPOS=PUNCT is checked always.
55+
*) Single quote can be an apostrophe. Double quote as a NOUN can be the inch symbol.
5556
copy_to_enhanced: for all PUNCT nodes, let the enhanced depencies be the same
5657
as the basic dependencies.
5758
"""
@@ -65,7 +66,7 @@ def _is_punct(self, node):
6566
return True
6667
if self.check_paired_punct_upos:
6768
return False
68-
if node.form == "'":
69+
if node.form in "'\"":
6970
return False
7071
if node.form in PAIRED_PUNCT or node.form in PAIRED_PUNCT.values():
7172
return True
@@ -78,7 +79,7 @@ def process_tree(self, root):
7879
for node in root.descendants:
7980
while self._is_punct(node.parent):
8081
node.parent = node.parent.parent
81-
root.draw()
82+
8283
# Second, fix paired punctuations: quotes and brackets, marking them in _punct_type.
8384
# This should be done before handling the subordinate punctuation,
8485
# in order to prevent non-projectivities e.g. in dot-before-closing-quote style sentences:
@@ -214,7 +215,7 @@ def _causes_gap(self, node):
214215

215216
def _fix_paired_punct(self, root, opening_node, closing_punct):
216217
if (self.check_paired_punct_upos
217-
or opening_node.form == "'") and opening_node.upos != 'PUNCT':
218+
or opening_node.form in "'\"") and opening_node.upos != 'PUNCT':
218219
return
219220
nested_level = 0
220221
for node in root.descendants[opening_node.ord:]:

0 commit comments

Comments
 (0)