Skip to content

Commit fde163c

Browse files
author
Federica Gamba (PhD
committed
further adjusted Latin feature rules
1 parent 9f1c9ad commit fde163c

File tree

1 file changed

+78
-44
lines changed

1 file changed

+78
-44
lines changed

udapi/block/ud/la/markfeatsbugs.py

Lines changed: 78 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,19 @@ def process_node(self, node):
2929
af = {}
3030
# NOUNS ################################################################
3131
if node.upos == 'NOUN':
32-
if not node.feats['Abbr'] == 'Yes' or node.feats['Case']: # abbreviated or indeclinable nouns
32+
if node.feats['Case'] and not node.feats['Abbr'] == 'Yes': # abbreviated or indeclinable nouns
3333
rf = ['Gender', 'Number', 'Case']
3434
af = {
3535
'Gender': ['Masc', 'Fem', 'Neut'],
3636
'Number': ['Sing', 'Plur'],
3737
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl'],
3838
'Degree': ['Dim'],
3939
'Abbr': ['Yes'],
40-
'Foreign': ['Yes']}
40+
'Foreign': ['Yes'],
41+
'VerbForm': ['Part']}
4142
if self.flavio:
4243
# Flavio added InflClass but not everywhere, so it is not required.
4344
af['InflClass'] = ['IndEurA', 'IndEurE', 'IndEurI', 'IndEurO', 'IndEurU', 'IndEurX']
44-
af['VerbForm'] = ['Part']
4545
af['Proper'] = ['Yes']
4646
af['Compound'] = ['Yes']
4747
af['NameType'] = ['Ast', 'Cal', 'Com', 'Geo', 'Giv', 'Let', 'Lit', 'Met', 'Nat', 'Rel', 'Sur', 'Oth']
@@ -76,14 +76,12 @@ def process_node(self, node):
7676
'Degree': ['Cmp', 'Sup', 'Abs'],
7777
'Abbr': ['Yes'],
7878
'Foreign': ['Yes'],
79-
'Polarity': ['Neg']}
79+
'Polarity': ['Neg'],
80+
'VerbForm': ['Part']}
8081
if self.flavio:
81-
# Flavio does not use Degree=Pos, hence Degree is not required.
82-
# rf = [f for f in rf if f != 'Degree']
8382
# Flavio added InflClass but not everywhere, so it is not required.
8483
af['InflClass'] = ['IndEurA', 'IndEurE', 'IndEurI', 'IndEurO', 'IndEurU', 'IndEurX']
8584
af['Compound'] = ['Yes']
86-
af['VerbForm'] = ['Part']
8785
af['Proper'] = ['Yes']
8886
af['Degree'].append('Dim')
8987
af['NameType'] = ['Ast', 'Cal', 'Com', 'Geo', 'Giv', 'Let', 'Lit', 'Met', 'Nat', 'Rel', 'Sur', 'Oth']
@@ -93,15 +91,16 @@ def process_node(self, node):
9391
elif node.upos == 'PRON':
9492
rf = ['PronType', 'Case']
9593
af = {
96-
'PronType': ['Prs', 'Rel', 'Ind', 'Int', 'Rcp'],
97-
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']
94+
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl'],
95+
'Proper': ['Yes'],
96+
'Compound': ['Yes'],
97+
'Polarity': ['Neg']
9898
}
9999
if node.feats['PronType'] == 'Prs':
100100
af['Reflex'] = ['Yes']
101101
if node.feats['Reflex'] == 'Yes': # seipsum, se
102102
rf.extend(['Person'])
103103
# seipsum has gender and number but se does not, so it is not required
104-
# TODO: seipsum in ITTB, but why lemma seipsum instead of seipse?
105104
af['Gender'] = ['Masc', 'Fem', 'Neut']
106105
af['Number'] = ['Sing', 'Plur']
107106
af['Person'] = ['3']
@@ -122,6 +121,19 @@ def process_node(self, node):
122121
rf = [f for f in rf if f != 'Case']
123122
af['Gender'] = ['Masc', 'Fem', 'Neut']
124123
af['Number'] = ['Sing', 'Plur']
124+
# lexical check of PronTypes
125+
af['PronType'] = []
126+
if node.lemma in ['is', 'ego', 'tu', 'sui', 'seipsum', 'nos', 'uos', 'vos', 'tumetipse', 'nosmetipse']:
127+
af['PronType'].append('Prs')
128+
elif node.lemma in ['quis', 'aliquis', 'nihil', 'nemo', 'quivis']:
129+
af['PronType'].append('Ind')
130+
elif node.lemma in ['inuicem', 'invicem']:
131+
af['PronType'].append('Rcp')
132+
rf.remove('Case')
133+
elif node.lemma in ['quicumque', 'qui', 'quisquis']:
134+
af['PronType'].append('Rel')
135+
if node.lemma in ['qui', 'quis', 'quisnam', 'ecquis']:
136+
af['PronType'].append('Int')
125137
if self.flavio:
126138
# Flavio added InflClass but not everywhere, so it is not required.
127139
af['InflClass'] = ['LatAnom', 'LatPron']
@@ -140,7 +152,9 @@ def process_node(self, node):
140152
'Number': ['Sing', 'Plur'],
141153
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl'],
142154
'Degree': ['Cmp', 'Abs', 'Sup'],
143-
'Polarity': ['Neg']
155+
'Polarity': ['Neg'],
156+
'Proper': ['Yes'],
157+
'PronType': []
144158
}
145159
if node.feats['Poss'] == 'Yes': # 'meus', 'tuus', 'suus', 'noster'
146160
rf.extend(['Poss', 'Person[psor]'])
@@ -152,8 +166,24 @@ def process_node(self, node):
152166
if node.feats['Person[psor]'] != '3':
153167
rf.append('Number[psor]')
154168
af['Number[psor]'] = ['Sing', 'Plur']
155-
else:
156-
af['PronType'] = ['Dem', 'Rel', 'Ind', 'Int', 'Tot', 'Con']
169+
if node.feats['PronType'] == 'Ind':
170+
af['NumType'] = ['Card']
171+
# lexical check of PronTypes
172+
if node.lemma in ['suus', 'meus', 'noster', 'tuus', 'uester', 'vester', 'voster']:
173+
if not af['PronType'] == ['Prs']:
174+
af['PronType'].append('Prs')
175+
elif node.lemma in ['aliquot', 'quidam', 'quispiam', 'quivis', 'nullus', 'nonnullus', 'aliqui', 'qui', 'quilibet', 'quantuslibet', 'unus', 'uterque', 'ullus', 'multus', 'quisque', 'paucus', 'complures', 'quamplures', 'quicumque', 'reliquus', 'plerusque', 'aliqualis', 'quisquam', 'qualiscumque']:
176+
af['PronType'].append('Ind')
177+
elif node.lemma in ['omnis', 'totus', 'ambo', 'cunctus', 'unusquisque', 'uniuersus']:
178+
af['PronType'].append('Tot')
179+
if node.lemma in ['quantus', 'qualis', 'quicumque', 'quot', 'quotus']:
180+
af['PronType'].append('Rel')
181+
elif node.lemma in ['qui', 'quantus', 'quot']:
182+
af['PronType'].append('Int')
183+
elif node.lemma in ['hic', 'ipse', 'ille', 'tantus', 'talis', 'is', 'iste', 'eiusmodi', 'huiusmodi', 'idem', 'totidem', 'tot']:
184+
af['PronType'].append('Dem')
185+
elif node.lemma in ['alius', 'alter', 'solus', 'ceterus', 'alteruter', 'neuter', 'uter']:
186+
af['PronType'].append('Con')
157187
if self.flavio:
158188
# Flavio added InflClass but not everywhere, so it is not required.
159189
af['InflClass'] = ['IndEurA', 'IndEurI', 'IndEurO', 'IndEurX', 'LatPron']
@@ -170,8 +200,8 @@ def process_node(self, node):
170200
rf = ['NumType', 'NumForm']
171201
af = {
172202
'NumType': ['Card'],
173-
'NumForm': ['Word', 'Roman', 'Digit']
174-
}
203+
'NumForm': ['Word', 'Roman', 'Digit'],
204+
'Proper': ['Yes']}
175205
# Arabic digits and Roman numerals do not have inflection features.
176206
if not re.match(r'^(Digit|Roman)$', node.feats['NumForm']):
177207
af['Gender'] = ['Masc', 'Fem', 'Neut']
@@ -186,40 +216,40 @@ def process_node(self, node):
186216
elif re.match(r'^(VERB|AUX)$', node.upos):
187217
rf = ['VerbForm', 'Aspect']
188218
af = {
189-
'VerbForm': ['Inf', 'Fin', 'Part'],
219+
'VerbForm': ['Inf', 'Fin', 'Part', 'Conv'],
190220
'Aspect': ['Imp', 'Inch', 'Perf', 'Prosp'],
191-
'Polarity': ['Neg']
221+
'Polarity': ['Neg'],
222+
'Typo': ['Yes']
192223
}
193-
if not re.match(r'^(Ger|Gdv)$', node.feats['VerbForm']):
224+
if node.feats['VerbForm'] not in ['Part', 'Conv']:
194225
rf.append('Tense')
195-
af['Tense'] = ['Pres', 'Fut']
196-
if node.upos == 'VERB': # and not node.lemma.endswith('sum'): # compounds of sum
226+
af['Tense'] = ['Past', 'Pqp', 'Pres', 'Fut']
227+
if node.upos == 'VERB':
197228
rf.append('Voice')
198229
af['Voice'] = ['Act', 'Pass']
199-
# Main verbs have aspect but auxiliaries don't.
200-
# TODO: apparently, apparently AUXs have aspect as well
201-
# if node.upos == 'VERB':
202-
# rf.append('Aspect')
203-
# af['Aspect'] = ['Imp', 'Inch', 'Perf', 'Prosp']
204230
if node.feats['VerbForm'] == 'Fin': # imperative, indicative or subjunctive
205231
rf.extend(['Mood', 'Person', 'Number'])
206-
af['Tense'].extend(['Past', 'Pqp'])
207232
af['Mood'] = ['Ind', 'Sub', 'Imp']
208233
af['Person'] = ['1', '2', '3']
209234
af['Number'] = ['Sing', 'Plur']
210235
elif node.feats['VerbForm'] == 'Part':
211236
rf.extend(['Gender', 'Number', 'Case'])
212-
af['Number'] = ['Sing', 'Plur']
213-
af['Gender'] = ['Masc', 'Fem', 'Neut']
237+
af['Number'] = ['Sing', 'Plur'] if node.misc['TraditionalMood'] != 'Gerundium' else ['Sing']
238+
af['Gender'] = ['Masc', 'Fem', 'Neut'] if node.misc['TraditionalMood'] != 'Gerundium' else ['Neut']
214239
af['Case'] = ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']
215240
af['Degree'] = ['Abs', 'Cmp']
216-
af['Gender'] = ['Masc', 'Fem', 'Neut']
217-
af['Tense'].append('Past')
218-
# else: nothing to be added for VerbForm=Inf
241+
if node.misc['TraditionalMood'].startswith('Gerundi'):
242+
af['Voice'] = ['Pass']
243+
af['Aspect'] = 'Prosp'
244+
elif node.feats['VerbForm'] == 'Conv':
245+
rf.extend(['Case', 'Gender', 'Number'])
246+
af['Case'] = ['Abl', 'Acc']
247+
af['Gender'] = ['Masc']
248+
af['Number'] = ['Sing']
249+
af['Voice'] = ['Act']
250+
elif node.feats['VerbForm'] == 'Inf':
251+
af['Tense'].remove('Pqp')
219252
if self.flavio:
220-
# Flavio has killed Tense in his treebanks.
221-
rf = [f for f in rf if f != 'Tense']
222-
af['VerbForm'].append('Vnoun')
223253
# Flavio added InflClass but not everywhere, so it is not required.
224254
af['InflClass'] = ['LatA', 'LatAnom', 'LatE', 'LatI', 'LatI2', 'LatX']
225255
if 'Degree' in af:
@@ -228,23 +258,22 @@ def process_node(self, node):
228258
af['Degree'] = ['Dim']
229259
af['Compound'] = ['Yes']
230260
af['Proper'] = ['Yes']
231-
if re.match(r'^(Part|Vnoun)$', node.feats['VerbForm']):
232-
af['InflClass[nominal]'] = ['IndEurA', 'IndEurI', 'IndEurO']
233-
af['VerbForm'].append('Vnoun')
261+
if re.match(r'^(Part|Conv)$', node.feats['VerbForm']):
262+
af['InflClass[nominal]'] = ['IndEurA', 'IndEurI', 'IndEurO', 'IndEurU']
234263
self.check_required_features(node, rf)
235264
self.check_allowed_features(node, af)
236265
# ADVERBS ##############################################################
237266
elif node.upos == 'ADV':
238267
af = {
239-
'AdvType': ['Loc', 'Tim'],
268+
'AdvType': ['Loc', 'Tim'],
240269
'PronType': ['Dem', 'Int', 'Rel', 'Ind', 'Neg', 'Tot', 'Con'],
241-
'Degree': ['Pos', 'Cmp', 'Sup', 'Abs'],
270+
'Degree': ['Pos', 'Cmp', 'Sup', 'Abs'],
271+
'NumType': ['Card', 'Ord'], # e.g., primum
242272
'Polarity': ['Neg']
243273
}
244274
if self.flavio:
245275
af['Compound'] = ['Yes']
246276
af['Form'] = ['Emp']
247-
af['NumType'] = ['Card', 'Ord'] # e.g., primum
248277
af['VerbForm'] = ['Part']
249278
af['Degree'].append('Dim')
250279
self.check_allowed_features(node, af)
@@ -262,7 +291,8 @@ def process_node(self, node):
262291
elif re.match(r'^[CS]CONJ$', node.upos):
263292
af = {
264293
'PronType': ['Rel', 'Con'],
265-
'Polarity': ['Neg']}
294+
'Polarity': ['Neg'],
295+
'Compound': ['Yes']}
266296
if self.flavio:
267297
af['Compound'] = ['Yes']
268298
af['Form'] = ['Emp']
@@ -271,10 +301,14 @@ def process_node(self, node):
271301
self.check_allowed_features(node, af)
272302
# ADPOSITIONS ##########################################################
273303
elif node.upos == 'ADP':
304+
rf = ['AdpType']
305+
af = {
306+
'AdpType': ['Prep', 'Post'],
307+
'Abbr': ['Yes']
308+
}
274309
if self.flavio:
275-
af = {
276-
'VerbForm': ['Part'],
277-
'Proper': ['Yes']}
310+
af['VerbForm'] = ['Part'],
311+
af['Proper'] = ['Yes']
278312
self.check_allowed_features(node, af)
279313
# THE REST: NO FEATURES ################################################
280314
else:

0 commit comments

Comments
 (0)