Skip to content

Commit 6de0c78

Browse files
committed
ud.FixPunct should not fail by creating cycles
If node.form is among the opening paired punctuation symbols and check_paired_punct_upos==False, we consider that node a punctuation even if its upos is not PUNCT. Thus we should prevent any children of these nodes as well.
1 parent 59f0fda commit 6de0c78

File tree

1 file changed

+19
-9
lines changed

1 file changed

+19
-9
lines changed

udapi/block/ud/fixpunct.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,25 @@ def __init__(self, check_paired_punct_upos=False, copy_to_enhanced=False, **kwar
6060
self.check_paired_punct_upos = check_paired_punct_upos
6161
self.copy_to_enhanced = copy_to_enhanced
6262

63+
def _is_punct(self, node):
64+
if node.upos == 'PUNCT':
65+
return True
66+
if self.check_paired_punct_upos:
67+
return False
68+
if node.form == "'":
69+
return False
70+
if node.form in PAIRED_PUNCT or node.form in PAIRED_PUNCT.values():
71+
return True
72+
return False
73+
6374
def process_tree(self, root):
6475
# First, make sure no PUNCT has children.
6576
# This may introduce multiple subroots, which will be fixed later on
6677
# (preventing to temporarily create multiple subroots here would prevent fixing some errors).
6778
for node in root.descendants:
68-
while node.parent.upos == 'PUNCT':
79+
while self._is_punct(node.parent):
6980
node.parent = node.parent.parent
70-
81+
root.draw()
7182
# Second, fix paired punctuations: quotes and brackets, marking them in _punct_type.
7283
# This should be done before handling the subordinate punctuation,
7384
# in order to prevent non-projectivities e.g. in dot-before-closing-quote style sentences:
@@ -77,7 +88,7 @@ def process_tree(self, root):
7788
self._punct_type = [None] * (1 + len(root.descendants))
7889
for node in root.descendants:
7990
if self._punct_type[node.ord] != 'closing':
80-
closing_punct = PAIRED_PUNCT.get(node.form, None)
91+
closing_punct = PAIRED_PUNCT.get(node.form)
8192
if closing_punct is not None:
8293
self._fix_paired_punct(root, node, closing_punct)
8394

@@ -236,12 +247,11 @@ def _fix_pair(self, root, opening_node, closing_node):
236247
# they also must not cause non-projectivity of other relations. This could
237248
# happen if an outside node is attached to an inside node. To account for
238249
# this, mark the inside parent as a head, too.
239-
else:
240-
if node.parent > opening_node and node.parent < closing_node:
241-
if node.parent.upos == 'PUNCT':
242-
punct_heads.append(node.parent)
243-
else:
244-
heads.append(node.parent)
250+
elif node.parent > opening_node and node.parent < closing_node:
251+
if node.parent.upos == 'PUNCT':
252+
punct_heads.append(node.parent)
253+
else:
254+
heads.append(node.parent)
245255

246256
# Punctuation should not have children, but if there is no other head candidate,
247257
# let's break this rule.

0 commit comments

Comments
 (0)