Skip to content

Commit c29590f

Browse files
committed
Enable separate checking of Flavio's approach to Latin morphology.
1 parent 64f5bc7 commit c29590f

File tree

1 file changed

+75
-66
lines changed

1 file changed

+75
-66
lines changed

udapi/block/ud/la/markfeatsbugs.py

Lines changed: 75 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,32 @@
1212

1313
class MarkFeatsBugs(udapi.block.ud.markfeatsbugs.MarkFeatsBugs):
1414

15+
def __init__(self, flavio=False, **kwargs):
16+
"""
17+
Create the ud.la.MarkFeatsBugs block instance.
18+
19+
Args:
20+
flavio=1: Accept features as defined by Flavio for treebanks he
21+
maintains. By default, a more conservative set of features and
22+
values is expected.
23+
"""
24+
super().__init__(**kwargs)
25+
self.flavio = flavio
26+
1527
def process_node(self, node):
1628
# NOUNS ################################################################
1729
if node.upos == 'NOUN':
18-
self.check_required_features(node, ['Gender', 'Number', 'Case'])
19-
self.check_allowed_features(node, {
30+
rf = ['Gender', 'Number', 'Case']
31+
af = {
2032
'Gender': ['Masc', 'Fem', 'Neut'],
2133
'Number': ['Sing', 'Plur'],
2234
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl'],
23-
'Foreign': ['Yes']})
35+
'Foreign': ['Yes']}
36+
if self.flavio:
37+
rf.append('InflClass')
38+
af['InflClass'] = ['IndEurA', 'IndEurO', 'IndEurX']
39+
self.check_required_features(node, rf)
40+
self.check_allowed_features(node, af)
2441
# PROPER NOUNS #########################################################
2542
elif node.upos == 'PROPN':
2643
self.check_required_features(node, ['Gender', 'Number', 'Case'])
@@ -32,13 +49,20 @@ def process_node(self, node):
3249
'Foreign': ['Yes']})
3350
# ADJECTIVES ###########################################################
3451
elif node.upos == 'ADJ':
35-
self.check_required_features(node, ['Gender', 'Number', 'Case', 'Degree'])
36-
self.check_allowed_features(node, {
52+
rf = ['Gender', 'Number', 'Case', 'Degree']
53+
af = {
3754
'Gender': ['Masc', 'Fem', 'Neut'],
3855
'Number': ['Sing', 'Plur'],
3956
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl'],
40-
'Degree': ['Pos', 'Cmp', 'Sup'],
41-
'Foreign': ['Yes']})
57+
'Degree': ['Pos', 'Cmp', 'Sup', 'Abs'],
58+
'Foreign': ['Yes']}
59+
if self.flavio:
60+
# Flavio does not use Degree=Pos, hence Degree is not required.
61+
rf = [f for f in rf if f != 'Degree']
62+
rf.append('InflClass')
63+
af['InflClass'] = ['IndEurA', 'IndEurO', 'IndEurX']
64+
self.check_required_features(node, rf)
65+
self.check_allowed_features(node, af)
4266
# PRONOUNS #############################################################
4367
elif node.upos == 'PRON':
4468
self.check_required_features(node, ['PronType'])
@@ -81,13 +105,19 @@ def process_node(self, node):
81105
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']
82106
})
83107
else:
84-
self.check_required_features(node, ['PronType', 'Gender', 'Number', 'Case'])
85-
self.check_allowed_features(node, {
108+
rf = ['PronType', 'Gender', 'Number', 'Case']
109+
af = {
86110
'PronType': ['Dem', 'Int', 'Rel', 'Ind', 'Neg', 'Tot', 'Emp'],
87111
'Gender': ['Masc', 'Fem', 'Neut'],
88112
'Number': ['Sing', 'Plur'],
89-
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']
90-
})
113+
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']}
114+
if self.flavio:
115+
rf.append('InflClass')
116+
af['PronType'].append('Con')
117+
af['InflClass'] = ['LatPron']
118+
af['Form'] = ['Emp']
119+
self.check_required_features(node, rf)
120+
self.check_allowed_features(node, af)
91121
# NUMERALS #############################################################
92122
elif node.upos == 'NUM':
93123
self.check_required_features(node, ['NumType', 'NumForm'])
@@ -98,73 +128,52 @@ def process_node(self, node):
98128
'NumForm': ['Digit', 'Roman']
99129
})
100130
else:
101-
self.check_required_features(node, ['NumType', 'NumForm', 'Number', 'Case'])
131+
self.check_required_features(node, ['NumType', 'NumForm'])
102132
self.check_allowed_features(node, {
103133
'NumType': ['Card'],
104-
'NumForm': ['Word'],
105-
'Number': ['Plur'],
106-
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']
134+
'NumForm': ['Word']
107135
})
108136
# VERBS AND AUXILIARIES ################################################
109137
elif re.match(r'^(VERB|AUX)$', node.upos):
110-
self.check_required_features(node, ['Aspect', 'VerbForm'])
111-
if node.feats['VerbForm'] == 'Inf':
112-
self.check_allowed_features(node, {
113-
'Aspect': ['Imp', 'Perf', 'Prosp'],
114-
'VerbForm': ['Inf'],
115-
'Polarity': ['Pos', 'Neg']
116-
})
117-
elif node.feats['VerbForm'] == 'Fin':
118-
if node.feats['Mood'] == 'Imp':
119-
self.check_required_features(node, ['Mood', 'Person', 'Number'])
120-
self.check_allowed_features(node, {
121-
'Aspect': ['Imp', 'Perf', 'Prosp'],
122-
'VerbForm': ['Fin'],
123-
'Mood': ['Imp'],
124-
'Person': ['1', '2', '3'],
125-
'Number': ['Sing', 'Plur'],
126-
'Polarity': ['Pos', 'Neg']
127-
})
128-
else: # indicative or subjunctive
129-
self.check_required_features(node, ['Mood', 'Voice', 'Tense', 'Person', 'Number'])
130-
self.check_allowed_features(node, {
131-
'Aspect': ['Imp', 'Perf', 'Prosp'],
132-
'VerbForm': ['Fin'],
133-
'Mood': ['Ind', 'Sub'],
134-
'Tense': ['Past', 'Imp', 'Pres', 'Fut'], # only in indicative
135-
'Voice': ['Act'],
136-
'Person': ['1', '2', '3'],
137-
'Number': ['Sing', 'Plur'],
138-
'Polarity': ['Pos', 'Neg']
139-
})
138+
rf = ['Aspect', 'VerbForm']
139+
af = {
140+
'Aspect': ['Imp', 'Perf', 'Prosp'],
141+
'VerbForm': ['Inf', 'Fin', 'Part', 'Vnoun'],
142+
'Polarity': ['Pos', 'Neg']}
143+
if node.feats['VerbForm'] == 'Fin':
144+
rf.extend(['Mood', 'Person', 'Number'])
145+
af['Mood'] = ['Ind', 'Sub', 'Imp']
146+
af['Person'] = ['1', '2', '3']
147+
af['Number'] = ['Sing', 'Plur']
148+
if re.match(r'^(Ind|Sub)$', node.feats['Mood']): # indicative or subjunctive
149+
rf.extend(['Voice', 'Tense'])
150+
af['Voice'] = ['Act', 'Pass']
151+
af['Tense'] = ['Past', 'Imp', 'Pres', 'Fut']
140152
elif node.feats['VerbForm'] == 'Part':
141-
self.check_required_features(node, ['Tense', 'Gender', 'Number', 'Voice'])
142-
self.check_allowed_features(node, {
143-
'Aspect': ['Imp', 'Perf', 'Prosp'],
144-
'VerbForm': ['Part'],
145-
'Tense': ['Past'],
146-
'Voice': ['Act'], # passive participle is ADJ, so we will not encounter it under VERB
147-
'Number': ['Sing', 'Plur'],
148-
'Gender': ['Masc', 'Fem', 'Neut'],
149-
'Polarity': ['Pos', 'Neg']
150-
})
153+
rf.extend(['Tense', 'Gender', 'Number', 'Voice'])
154+
af['Tense'] = ['Past']
155+
af['Voice'] = ['Act']
156+
af['Number'] = ['Sing', 'Plur']
157+
af['Gender'] = ['Masc', 'Fem', 'Neut']
151158
else: # verbal noun
152-
self.check_required_features(node, ['Tense', 'Number', 'Voice'])
153-
self.check_allowed_features(node, {
154-
'Aspect': ['Imp', 'Perf', 'Prosp'],
155-
'VerbForm': ['Vnoun'],
156-
'Tense': ['Past', 'Pres'],
157-
'Voice': ['Act'],
158-
'Number': ['Sing', 'Plur'],
159-
'Gender': ['Masc', 'Fem', 'Neut'], # annotated only in singular
160-
'Polarity': ['Pos', 'Neg']
161-
})
159+
rf.extend(['Tense', 'Voice'])
160+
af['Tense'] = ['Past', 'Pres']
161+
af['Voice'] = ['Act']
162+
af['Gender'] = ['Masc', 'Fem', 'Neut']
163+
if self.flavio:
164+
# Flavio has killed Tense in his treebanks.
165+
rf = [f for f in rf if f != 'Tense']
166+
# Flavio added InflClass but not everywhere, so it is not required.
167+
af['InflClass'] = ['LatA', 'LatAnom', 'LatE', 'LatI2', 'LatX']
168+
self.check_required_features(node, rf)
169+
self.check_allowed_features(node, af)
162170
# ADVERBS ##############################################################
163171
elif node.upos == 'ADV':
164172
if node.feats['PronType'] != '':
165173
# Pronominal adverbs are neither compared nor negated.
166174
self.check_allowed_features(node, {
167-
'PronType': ['Dem', 'Int', 'Rel', 'Ind', 'Neg', 'Tot']
175+
'PronType': ['Dem', 'Int', 'Rel', 'Ind', 'Neg', 'Tot'],
176+
'AdvType': ['Loc']
168177
})
169178
else:
170179
# The remaining adverbs are neither pronominal, nor compared or

0 commit comments

Comments
 (0)