@@ -60,14 +60,25 @@ def __init__(self, check_paired_punct_upos=False, copy_to_enhanced=False, **kwar
60
60
self .check_paired_punct_upos = check_paired_punct_upos
61
61
self .copy_to_enhanced = copy_to_enhanced
62
62
63
+ def _is_punct (self , node ):
64
+ if node .upos == 'PUNCT' :
65
+ return True
66
+ if self .check_paired_punct_upos :
67
+ return False
68
+ if node .form == "'" :
69
+ return False
70
+ if node .form in PAIRED_PUNCT or node .form in PAIRED_PUNCT .values ():
71
+ return True
72
+ return False
73
+
63
74
def process_tree (self , root ):
64
75
# First, make sure no PUNCT has children.
65
76
# This may introduce multiple subroots, which will be fixed later on
66
77
# (preventing to temporarily create multiple subroots here would prevent fixing some errors).
67
78
for node in root .descendants :
68
- while node .parent . upos == 'PUNCT' :
79
+ while self . _is_punct ( node .parent ) :
69
80
node .parent = node .parent .parent
70
-
81
+ root . draw ()
71
82
# Second, fix paired punctuations: quotes and brackets, marking them in _punct_type.
72
83
# This should be done before handling the subordinate punctuation,
73
84
# in order to prevent non-projectivities e.g. in dot-before-closing-quote style sentences:
@@ -77,7 +88,7 @@ def process_tree(self, root):
77
88
self ._punct_type = [None ] * (1 + len (root .descendants ))
78
89
for node in root .descendants :
79
90
if self ._punct_type [node .ord ] != 'closing' :
80
- closing_punct = PAIRED_PUNCT .get (node .form , None )
91
+ closing_punct = PAIRED_PUNCT .get (node .form )
81
92
if closing_punct is not None :
82
93
self ._fix_paired_punct (root , node , closing_punct )
83
94
@@ -236,12 +247,11 @@ def _fix_pair(self, root, opening_node, closing_node):
236
247
# they also must not cause non-projectivity of other relations. This could
237
248
# happen if an outside node is attached to an inside node. To account for
238
249
# this, mark the inside parent as a head, too.
239
- else :
240
- if node .parent > opening_node and node .parent < closing_node :
241
- if node .parent .upos == 'PUNCT' :
242
- punct_heads .append (node .parent )
243
- else :
244
- heads .append (node .parent )
250
+ elif node .parent > opening_node and node .parent < closing_node :
251
+ if node .parent .upos == 'PUNCT' :
252
+ punct_heads .append (node .parent )
253
+ else :
254
+ heads .append (node .parent )
245
255
246
256
# Punctuation should not have children, but if there is no other head candidate,
247
257
# let's break this rule.
0 commit comments