Skip to content

Commit 8b05a49

Browse files
committed
Adjusted Latin feature rules.
1 parent c29590f commit 8b05a49

File tree

1 file changed

+121
-88
lines changed

1 file changed

+121
-88
lines changed

udapi/block/ud/la/markfeatsbugs.py

Lines changed: 121 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -25,121 +25,146 @@ def __init__(self, flavio=False, **kwargs):
2525
self.flavio = flavio
2626

2727
def process_node(self, node):
28+
rf = []
29+
af = {}
2830
# NOUNS ################################################################
2931
if node.upos == 'NOUN':
30-
rf = ['Gender', 'Number', 'Case']
32+
if not node.feats['Abbr'] == 'Yes':
33+
rf = ['Gender', 'Number', 'Case']
3134
af = {
3235
'Gender': ['Masc', 'Fem', 'Neut'],
3336
'Number': ['Sing', 'Plur'],
3437
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl'],
38+
'Degree': ['Dim'],
39+
'Abbr': ['Yes'],
3540
'Foreign': ['Yes']}
3641
if self.flavio:
37-
rf.append('InflClass')
38-
af['InflClass'] = ['IndEurA', 'IndEurO', 'IndEurX']
42+
# Flavio added InflClass but not everywhere, so it is not required.
43+
af['InflClass'] = ['IndEurA', 'IndEurE', 'IndEurI', 'IndEurO', 'IndEurU', 'IndEurX']
3944
self.check_required_features(node, rf)
4045
self.check_allowed_features(node, af)
4146
# PROPER NOUNS #########################################################
4247
elif node.upos == 'PROPN':
43-
self.check_required_features(node, ['Gender', 'Number', 'Case'])
44-
self.check_allowed_features(node, {
48+
if not node.feats['Abbr'] == 'Yes':
49+
rf = ['Gender', 'Number', 'Case']
50+
af = {
4551
'Gender': ['Masc', 'Fem', 'Neut'],
4652
'Number': ['Sing', 'Plur'],
4753
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl'],
4854
'NameType': ['Giv', 'Sur', 'Geo'],
49-
'Foreign': ['Yes']})
55+
'Abbr': ['Yes'],
56+
'Foreign': ['Yes']}
57+
if self.flavio:
58+
# Flavio added InflClass but not everywhere, so it is not required.
59+
af['InflClass'] = ['IndEurA', 'IndEurE', 'IndEurI', 'IndEurO', 'IndEurU', 'IndEurX']
60+
af['Proper'] = ['Yes']
61+
self.check_required_features(node, rf)
62+
self.check_allowed_features(node, af)
5063
# ADJECTIVES ###########################################################
5164
elif node.upos == 'ADJ':
52-
rf = ['Gender', 'Number', 'Case', 'Degree']
65+
if not node.feats['Abbr'] == 'Yes':
66+
rf = ['Gender', 'Number', 'Case', 'Degree']
5367
af = {
68+
'NumType': ['Ord', 'Dist'],
5469
'Gender': ['Masc', 'Fem', 'Neut'],
5570
'Number': ['Sing', 'Plur'],
5671
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl'],
5772
'Degree': ['Pos', 'Cmp', 'Sup', 'Abs'],
73+
'Abbr': ['Yes'],
5874
'Foreign': ['Yes']}
5975
if self.flavio:
6076
# Flavio does not use Degree=Pos, hence Degree is not required.
6177
rf = [f for f in rf if f != 'Degree']
62-
rf.append('InflClass')
63-
af['InflClass'] = ['IndEurA', 'IndEurO', 'IndEurX']
78+
# Flavio added InflClass but not everywhere, so it is not required.
79+
af['InflClass'] = ['IndEurA', 'IndEurE', 'IndEurI', 'IndEurO', 'IndEurU', 'IndEurX']
80+
af['Compound'] = ['Yes']
81+
af['Proper'] = ['Yes']
6482
self.check_required_features(node, rf)
6583
self.check_allowed_features(node, af)
6684
# PRONOUNS #############################################################
6785
elif node.upos == 'PRON':
68-
self.check_required_features(node, ['PronType'])
86+
rf = ['PronType', 'Case']
87+
af = {
88+
'PronType': ['Prs', 'Rel', 'Ind'],
89+
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']
90+
}
6991
if node.feats['PronType'] == 'Prs':
70-
if node.feats['Reflex'] == 'Yes':
71-
self.check_required_features(node, ['PronType', 'Reflex', 'Case'])
72-
self.check_allowed_features(node, {
73-
'PronType': ['Prs'],
74-
'Reflex': ['Yes'],
75-
'Case': ['Gen', 'Dat', 'Acc', 'Loc', 'Abl']
76-
})
77-
else: # not reflexive
78-
if node.feats['Person'] == '3': # on, ona, ono, oni, ony
79-
self.check_required_features(node, ['PronType', 'Person', 'Gender', 'Number', 'Case'])
80-
self.check_allowed_features(node, {
81-
'PronType': ['Prs'],
82-
'Person': ['3'],
83-
'Gender': ['Masc', 'Fem', 'Neut'],
84-
'Number': ['Sing', 'Plur'],
85-
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']
86-
})
87-
else: # 1st and 2nd person do not have gender: já, ty
88-
self.check_required_features(node, ['PronType', 'Person', 'Number', 'Case'])
89-
self.check_allowed_features(node, {
90-
'PronType': ['Prs'],
91-
'Person': ['1', '2'],
92-
'Number': ['Sing', 'Plur'],
93-
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']
94-
})
92+
af['Reflex'] = ['Yes']
93+
if node.feats['Reflex'] == 'Yes': # seipsum, se
94+
# seipsum has gender and number but se does not, so it is not required
95+
af['Gender'] = ['Masc']
96+
af['Number'] = ['Sing']
97+
af['Person'] = ['3']
98+
af['Case'] = ['Gen', 'Dat', 'Acc', 'Loc', 'Abl']
99+
else: # not reflexive: ego, tu, is, nos
100+
rf.extend(['Person', 'Number'])
101+
af['Person'] = ['1', '2', '3']
102+
af['Number'] = ['Sing', 'Plur']
103+
# 1st and 2nd person do not have gender
104+
if node.feats['Person'] == '3': # is, id
105+
rf.append('Gender')
106+
af['Gender'] = ['Masc', 'Fem', 'Neut']
107+
elif re.match(r'^(Rel|Ind)$', node.feats['PronType']):
108+
rf.extend(['Gender', 'Number'])
109+
af['Gender'] = ['Masc', 'Fem', 'Neut']
110+
af['Number'] = ['Sing', 'Plur']
111+
if self.flavio:
112+
# Flavio added InflClass but not everywhere, so it is not required.
113+
af['InflClass'] = ['LatAnom', 'LatPron']
114+
self.check_required_features(node, rf)
115+
self.check_allowed_features(node, af)
95116
# DETERMINERS ##########################################################
96117
elif node.upos == 'DET':
97-
if node.feats['Poss'] == 'Yes': # 'můj', 'tvůj', 'svůj'
98-
self.check_required_features(node, ['PronType', 'Poss', 'Person', 'Gender', 'Number', 'Case'])
99-
self.check_allowed_features(node, {
100-
'PronType': ['Prs'],
101-
'Poss': ['Yes'],
102-
'Person': ['1', '2', '3'],
103-
'Gender': ['Masc', 'Fem', 'Neut'],
104-
'Number': ['Sing', 'Plur'],
105-
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']
106-
})
118+
rf = ['PronType', 'Gender', 'Number', 'Case']
119+
af = {
120+
'Gender': ['Masc', 'Fem', 'Neut'],
121+
'Number': ['Sing', 'Plur'],
122+
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']}
123+
if node.feats['Poss'] == 'Yes': # 'meus', 'tuus', 'suus', 'noster'
124+
rf.extend(['Poss', 'Person[psor]'])
125+
af['PronType'] = ['Prs']
126+
af['Poss'] = 'Yes'
127+
af['Person[psor]'] = ['1', '2', '3']
128+
af['Reflex'] = ['Yes']
129+
# The possessor's number is distinguished in the first and second person (meus vs. noster) but not in the third person (suus).
130+
if node.feats['Person[psor]'] != '3':
131+
rf.append('Number[psor]')
132+
af['Number[psor]'] = ['Sing', 'Plur']
107133
else:
108-
rf = ['PronType', 'Gender', 'Number', 'Case']
109-
af = {
110-
'PronType': ['Dem', 'Int', 'Rel', 'Ind', 'Neg', 'Tot', 'Emp'],
111-
'Gender': ['Masc', 'Fem', 'Neut'],
112-
'Number': ['Sing', 'Plur'],
113-
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']}
114-
if self.flavio:
115-
rf.append('InflClass')
116-
af['PronType'].append('Con')
117-
af['InflClass'] = ['LatPron']
118-
af['Form'] = ['Emp']
119-
self.check_required_features(node, rf)
120-
self.check_allowed_features(node, af)
134+
af['PronType'] = ['Dem', 'Rel', 'Ind', 'Tot', 'Con']
135+
if self.flavio:
136+
# Flavio added InflClass but not everywhere, so it is not required.
137+
af['InflClass'] = ['IndEurA', 'IndEurI', 'IndEurO', 'LatPron']
138+
af['Form'] = ['Emp']
139+
self.check_required_features(node, rf)
140+
self.check_allowed_features(node, af)
121141
# NUMERALS #############################################################
122142
elif node.upos == 'NUM':
123-
self.check_required_features(node, ['NumType', 'NumForm'])
143+
rf = ['NumType', 'NumForm']
144+
af = {
145+
'NumType': ['Card'],
146+
'NumForm': ['Word', 'Roman', 'Digit']
147+
}
124148
# Arabic digits and Roman numerals do not have inflection features.
125-
if re.match(r'^(Digit|Roman)$', node.feats['NumForm']):
126-
self.check_allowed_features(node, {
127-
'NumType': ['Card'],
128-
'NumForm': ['Digit', 'Roman']
129-
})
130-
else:
131-
self.check_required_features(node, ['NumType', 'NumForm'])
132-
self.check_allowed_features(node, {
133-
'NumType': ['Card'],
134-
'NumForm': ['Word']
135-
})
149+
if not re.match(r'^(Digit|Roman)$', node.feats['NumForm']):
150+
af['Gender'] = ['Masc', 'Fem', 'Neut']
151+
af['Number'] = ['Sing', 'Plur']
152+
af['Case'] = ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']
153+
if self.flavio:
154+
# Flavio added InflClass but not everywhere, so it is not required.
155+
af['InflClass'] = ['IndEurA', 'IndEurI', 'IndEurO', 'LatPron']
156+
self.check_required_features(node, rf)
157+
self.check_allowed_features(node, af)
136158
# VERBS AND AUXILIARIES ################################################
137159
elif re.match(r'^(VERB|AUX)$', node.upos):
138-
rf = ['Aspect', 'VerbForm']
160+
rf = ['VerbForm']
139161
af = {
140-
'Aspect': ['Imp', 'Perf', 'Prosp'],
141162
'VerbForm': ['Inf', 'Fin', 'Part', 'Vnoun'],
142163
'Polarity': ['Pos', 'Neg']}
164+
# Main verbs have aspect but auxiliaries don't.
165+
if node.upos == 'VERB':
166+
rf.append('Aspect')
167+
af['Aspect'] = ['Imp', 'Perf', 'Prosp']
143168
if node.feats['VerbForm'] == 'Fin':
144169
rf.extend(['Mood', 'Person', 'Number'])
145170
af['Mood'] = ['Ind', 'Sub', 'Imp']
@@ -150,40 +175,48 @@ def process_node(self, node):
150175
af['Voice'] = ['Act', 'Pass']
151176
af['Tense'] = ['Past', 'Imp', 'Pres', 'Fut']
152177
elif node.feats['VerbForm'] == 'Part':
153-
rf.extend(['Tense', 'Gender', 'Number', 'Voice'])
178+
rf.extend(['Tense', 'Gender', 'Number', 'Voice', 'Case'])
154179
af['Tense'] = ['Past']
155-
af['Voice'] = ['Act']
180+
af['Voice'] = ['Act', 'Pass']
156181
af['Number'] = ['Sing', 'Plur']
157182
af['Gender'] = ['Masc', 'Fem', 'Neut']
158-
else: # verbal noun
183+
af['Case'] = ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']
184+
af['Degree'] = ['Abs']
185+
elif node.feats['VerbForm'] == 'Vnoun':
159186
rf.extend(['Tense', 'Voice'])
160187
af['Tense'] = ['Past', 'Pres']
161-
af['Voice'] = ['Act']
188+
af['Voice'] = ['Act', 'Pass']
162189
af['Gender'] = ['Masc', 'Fem', 'Neut']
190+
# else: nothing to be added form VerbForm=Inf
163191
if self.flavio:
164192
# Flavio has killed Tense in his treebanks.
165193
rf = [f for f in rf if f != 'Tense']
166194
# Flavio added InflClass but not everywhere, so it is not required.
167195
af['InflClass'] = ['LatA', 'LatAnom', 'LatE', 'LatI2', 'LatX']
196+
if node.feats['VerbForm'] == 'Part':
197+
af['InflClass[nominal]'] = ['IndEurA', 'IndEurI', 'IndEurO']
168198
self.check_required_features(node, rf)
169199
self.check_allowed_features(node, af)
170200
# ADVERBS ##############################################################
171201
elif node.upos == 'ADV':
172-
if node.feats['PronType'] != '':
173-
# Pronominal adverbs are neither compared nor negated.
174-
self.check_allowed_features(node, {
175-
'PronType': ['Dem', 'Int', 'Rel', 'Ind', 'Neg', 'Tot'],
176-
'AdvType': ['Loc']
177-
})
178-
else:
179-
# The remaining adverbs are neither pronominal, nor compared or
180-
# negated.
181-
self.check_allowed_features(node, {})
202+
af = {
203+
'AdvType': ['Loc', 'Tim'],
204+
'PronType': ['Dem', 'Int', 'Rel', 'Ind', 'Neg', 'Tot', 'Con'],
205+
'Degree': ['Pos', 'Cmp', 'Sup', 'Abs']
206+
}
207+
if self.flavio:
208+
af['Compound'] = 'Yes'
209+
af['Form'] = 'Emp'
210+
self.check_allowed_features(node, af)
182211
# PARTICLES ############################################################
183212
elif node.upos == 'PART':
184-
self.check_allowed_features(node, {
213+
af = {
214+
'PartType': ['Int'],
185215
'Polarity': ['Neg']
186-
})
216+
}
217+
if self.flavio:
218+
af['Form'] = 'Emp'
219+
self.check_allowed_features(node, af)
187220
# THE REST: NO FEATURES ################################################
188221
else:
189222
self.check_allowed_features(node, {})

0 commit comments

Comments
 (0)