Skip to content

Commit 327bb6f

Browse files
author
Federica Gamba
committed
adjustments in Latin feature rules
1 parent 043f4d7 commit 327bb6f

File tree

1 file changed

+47
-27
lines changed

1 file changed

+47
-27
lines changed

udapi/block/ud/la/markfeatsbugs.py

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ def process_node(self, node):
2828
rf = []
2929
af = {}
3030
# PROIEL-specific: greek words without features
31-
if node.lemma == 'greek.expression':
31+
# LLCT-specific: corrupted nodes
32+
if node.lemma in ['greek.expression', 'missing^token']:
3233
pass
3334
# NOUNS ################################################################
3435
elif node.upos == 'NOUN':
@@ -41,12 +42,14 @@ def process_node(self, node):
4142
'Degree': ['Dim'],
4243
'Abbr': ['Yes'],
4344
'Foreign': ['Yes'],
44-
'VerbForm': ['Part']}
45+
'VerbForm': ['Part', 'Vnoun']}
4546
if self.flavio:
4647
# Flavio added InflClass but not everywhere, so it is not required.
47-
af['InflClass'] = ['IndEurA', 'IndEurE', 'IndEurI', 'IndEurO', 'IndEurU', 'IndEurX']
48+
af['InflClass'] = ['Ind', 'IndEurA', 'IndEurE', 'IndEurI', 'IndEurO', 'IndEurU', 'IndEurX']
4849
af['Proper'] = ['Yes']
50+
af['Polarity'] = ['Neg']
4951
af['Compound'] = ['Yes']
52+
af['Variant'] = ['Greek']
5053
af['NameType'] = ['Ast', 'Cal', 'Com', 'Geo', 'Giv', 'Let', 'Lit', 'Met', 'Nat', 'Rel', 'Sur', 'Oth']
5154
self.check_required_features(node, rf)
5255
self.check_allowed_features(node, af)
@@ -61,18 +64,18 @@ def process_node(self, node):
6164
'Abbr': ['Yes'],
6265
'Foreign': ['Yes']}
6366
if self.flavio:
64-
af['Compound'] = 'Yes'
67+
af['Compound'] = ['Yes']
68+
af['Variant'] = ['Greek']
6569
af['NameType'] = ['Ast', 'Cal', 'Com', 'Geo', 'Giv', 'Let', 'Lit', 'Met', 'Nat', 'Rel', 'Sur', 'Oth']
66-
if not node.feats['Abbr'] == 'Yes' and node.feats['Case']:
67-
af['InflClass'] = ['IndEurA', 'IndEurE', 'IndEurI', 'IndEurO', 'IndEurU', 'IndEurX']
70+
af['InflClass'] = ['Ind', 'IndEurA', 'IndEurE', 'IndEurI', 'IndEurO', 'IndEurU', 'IndEurX']
6871
self.check_required_features(node, rf)
6972
self.check_allowed_features(node, af)
7073
# ADJECTIVES ###########################################################
7174
elif node.upos == 'ADJ':
7275
if not node.feats['Abbr'] == 'Yes' and node.feats['Case']:
7376
rf = ['Gender', 'Number', 'Case']
7477
af = {
75-
'NumType': ['Ord', 'Dist'],
78+
'NumType': ['Dist', 'Mult', 'Ord'],
7679
'Gender': ['Masc', 'Fem', 'Neut'],
7780
'Number': ['Sing', 'Plur'],
7881
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl'],
@@ -83,9 +86,10 @@ def process_node(self, node):
8386
'VerbForm': ['Part']}
8487
if self.flavio:
8588
# Flavio added InflClass but not everywhere, so it is not required.
86-
af['InflClass'] = ['IndEurA', 'IndEurE', 'IndEurI', 'IndEurO', 'IndEurU', 'IndEurX']
89+
af['InflClass'] = ['Ind', 'IndEurA', 'IndEurE', 'IndEurI', 'IndEurO', 'IndEurU', 'IndEurX']
8790
af['Compound'] = ['Yes']
8891
af['Proper'] = ['Yes']
92+
af['Variant'] = ['Greek']
8993
af['Degree'].append('Dim')
9094
af['NameType'] = ['Ast', 'Cal', 'Com', 'Geo', 'Giv', 'Let', 'Lit', 'Met', 'Nat', 'Rel', 'Sur', 'Oth']
9195
self.check_required_features(node, rf)
@@ -112,10 +116,10 @@ def process_node(self, node):
112116
rf.extend(['Person', 'Number'])
113117
af['Person'] = ['1', '2', '3']
114118
af['Number'] = ['Sing', 'Plur']
115-
# 1st and 2nd person do not have gender
119+
# 3rd person must have gender
116120
if node.feats['Person'] == '3': # is, id
117121
rf.append('Gender')
118-
af['Gender'] = ['Masc', 'Fem', 'Neut']
122+
af['Gender'] = ['Masc', 'Fem', 'Neut']
119123
elif re.match(r'^(Rel|Int)$', node.feats['PronType']):
120124
rf.extend(['Gender', 'Number'])
121125
af['Gender'] = ['Masc', 'Fem', 'Neut']
@@ -126,20 +130,20 @@ def process_node(self, node):
126130
af['Number'] = ['Sing', 'Plur']
127131
# lexical check of PronTypes
128132
af['PronType'] = []
129-
if node.lemma in ['is', 'ego', 'tu', 'sui', 'seipsum', 'nos', 'uos', 'vos', 'tumetipse', 'nosmetipse']:
133+
if node.lemma in ['ego', 'tu', 'is', 'sui', 'seipsum', 'nos', 'uos', 'vos', 'egoipse', 'egometipse', 'tumetipse', 'semetipse', 'nosmetipse']:
130134
af['PronType'].append('Prs')
131-
elif node.lemma in ['quis', 'aliquis', 'nihil', 'nemo', 'quivis', 'qui']:
135+
elif node.lemma in ['aliquis', 'nemo', 'nihil', 'nihilum', 'qui', 'quis', 'quisquis', 'quiuis', 'quivis']:
132136
af['PronType'].append('Ind')
133137
elif node.lemma in ['inuicem', 'invicem']:
134138
af['PronType'].append('Rcp')
135139
rf.remove('Case')
136-
if node.lemma in ['quicumque', 'qui', 'quisquis']:
140+
if node.lemma in ['qui', 'quicumque', 'quisquis']:
137141
af['PronType'].append('Rel')
138-
if node.lemma in ['qui', 'quis', 'quisnam', 'ecquis', 'ecqui']:
142+
if node.lemma in [ 'ecquis', 'ecqui', 'numquis', 'qui', 'quis', 'quisnam']:
139143
af['PronType'].append('Int')
140144
if self.flavio:
141145
# Flavio added InflClass but not everywhere, so it is not required.
142-
af['InflClass'] = ['LatAnom', 'LatPron']
146+
af['InflClass'] = ['Ind', 'IndEurO', 'IndEurX', 'LatAnom', 'LatPron']
143147
af['Compound'] = ['Yes']
144148
af['Polarity'] = ['Neg']
145149
af['Form'] = ['Emp']
@@ -175,25 +179,26 @@ def process_node(self, node):
175179
if node.lemma in ['suus', 'meus', 'noster', 'tuus', 'uester', 'vester', 'voster']:
176180
if not af['PronType'] == ['Prs']:
177181
af['PronType'].append('Prs')
178-
elif node.lemma in ['aliquot', 'quidam', 'quispiam', 'quivis', 'nullus', 'nonnullus', 'aliqui', 'qui', 'quilibet', 'quantuslibet', 'unus', 'uterque', 'ullus', 'multus', 'quisque', 'paucus', 'complures', 'quamplures', 'quicumque', 'reliquus', 'plerusque', 'aliqualis', 'quisquam', 'qualiscumque']:
182+
elif node.lemma in ['aliquantus', 'aliqui', 'aliquot', 'quidam', 'nonnullus', 'nullus', 'quantuscumque', 'quantuslibet', 'qui', 'quilibet', 'quispiam', 'quiuis', 'quivis', 'quotlibet', 'ullus', 'unus', 'uterque','multus', 'quisque', 'paucus', 'complures', 'quamplures', 'quicumque', 'reliquus', 'plerusque', 'aliqualis', 'quisquam', 'qualiscumque']:
179183
af['PronType'].append('Ind')
180184
elif node.lemma in ['omnis', 'totus', 'ambo', 'cunctus', 'unusquisque', 'uniuersus']:
181185
af['PronType'].append('Tot')
182186
if node.lemma in ['quantus', 'qualis', 'quicumque', 'quot', 'quotus', 'quotquot']:
183187
af['PronType'].append('Rel')
184-
elif node.lemma in ['qui', 'quantus', 'quot']:
188+
if node.lemma in ['qui', 'quantus', 'quot']:
185189
af['PronType'].append('Int')
186-
elif node.lemma in ['hic', 'ipse', 'ille', 'tantus', 'talis', 'is', 'iste', 'eiusmodi', 'huiusmodi', 'idem', 'totidem', 'tot']:
190+
elif node.lemma in ['hic', 'ipse', 'ille', 'tantus', 'talis', 'is', 'iste', 'eiusmodi', 'huiusmodi', 'idem', 'totidem', 'tot', 'praedictus', 'praefatus', 'suprascriptus']:
187191
af['PronType'].append('Dem')
188-
elif node.lemma in ['alius', 'alter', 'solus', 'ceterus', 'alteruter', 'neuter', 'uter']:
192+
elif node.lemma in ['alius', 'alter', 'solus', 'ceterus', 'alteruter', 'neuter', 'uter', 'uterlibet', 'uterque']:
189193
af['PronType'].append('Con')
190194
if self.flavio:
191195
# Flavio added InflClass but not everywhere, so it is not required.
192-
af['InflClass'] = ['IndEurA', 'IndEurI', 'IndEurO', 'IndEurX', 'LatPron']
196+
af['InflClass'] = ['Ind', 'IndEurA', 'IndEurI', 'IndEurO', 'IndEurX', 'LatPron']
193197
af['Compound'] = ['Yes']
194198
af['Form'] = ['Emp']
195199
af['NumType'] = ['Card']
196200
af['Degree'].append('Dim')
201+
af['PronType'].append('Art')
197202
if re.match(r'^(unus|ambo)', node.lemma):
198203
af['NumValue'] = ['1', '2']
199204
self.check_required_features(node, rf)
@@ -202,7 +207,7 @@ def process_node(self, node):
202207
elif node.upos == 'NUM':
203208
rf = ['NumType', 'NumForm']
204209
af = {
205-
'NumType': ['Card'],
210+
'NumType': ['Card', 'Ord'],
206211
'NumForm': ['Word', 'Roman', 'Digit'],
207212
'Proper': ['Yes']}
208213
# Arabic digits and Roman numerals do not have inflection features.
@@ -212,7 +217,9 @@ def process_node(self, node):
212217
af['Case'] = ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Abl']
213218
if self.flavio:
214219
# Flavio added InflClass but not everywhere, so it is not required. # e.g. duodecim
215-
af['InflClass'] = ['IndEurA', 'IndEurI', 'IndEurO', 'LatPron']
220+
af['InflClass'] = ['Ind', 'IndEurA', 'IndEurI', 'IndEurO', 'LatPron']
221+
af['NumForm'].append('Reference')
222+
af['Compound'] = ['Yes']
216223
self.check_required_features(node, rf)
217224
self.check_allowed_features(node, af)
218225
# VERBS AND AUXILIARIES ################################################
@@ -227,7 +234,7 @@ def process_node(self, node):
227234
if node.feats['VerbForm'] not in ['Part', 'Conv']:
228235
rf.append('Tense')
229236
af['Tense'] = ['Past', 'Pqp', 'Pres', 'Fut']
230-
if node.upos == 'VERB':
237+
if node.upos == 'VERB' or (node.upos == 'AUX' and node.lemma != 'sum'):
231238
rf.append('Voice')
232239
af['Voice'] = ['Act', 'Pass']
233240
if node.feats['VerbForm'] == 'Fin': # imperative, indicative or subjunctive
@@ -255,14 +262,20 @@ def process_node(self, node):
255262
if self.flavio:
256263
# Flavio added InflClass but not everywhere, so it is not required.
257264
af['InflClass'] = ['LatA', 'LatAnom', 'LatE', 'LatI', 'LatI2', 'LatX']
265+
af['VerbType'] = ['Mod']
258266
if 'Degree' in af:
259267
af['Degree'].append('Dim')
260268
else:
261269
af['Degree'] = ['Dim']
262270
af['Compound'] = ['Yes']
263271
af['Proper'] = ['Yes']
264272
if re.match(r'^(Part|Conv)$', node.feats['VerbForm']):
265-
af['InflClass[nominal]'] = ['IndEurA', 'IndEurI', 'IndEurO', 'IndEurU']
273+
af['InflClass[nominal]'] = ['IndEurA', 'IndEurI', 'IndEurO', 'IndEurU', 'IndEurX']
274+
elif node.feats['VerbForm'] == 'Inf':
275+
af['Case'] = ['Nom', 'Acc', 'Abl']
276+
af['Gender'] = ['Neut']
277+
af['Number'] = ['Sing']
278+
af['InflClass[nominal]'] = ['Ind']
266279
self.check_required_features(node, rf)
267280
self.check_allowed_features(node, af)
268281
# ADVERBS ##############################################################
@@ -271,13 +284,13 @@ def process_node(self, node):
271284
'AdvType': ['Loc', 'Tim'],
272285
'PronType': ['Dem', 'Int', 'Rel', 'Ind', 'Neg', 'Tot', 'Con'],
273286
'Degree': ['Pos', 'Cmp', 'Sup', 'Abs'],
274-
'NumType': ['Card', 'Ord'], # e.g., primum
287+
'NumType': ['Card', 'Mult', 'Ord'], # e.g., primum
275288
'Polarity': ['Neg']
276289
}
277290
if self.flavio:
278291
af['Compound'] = ['Yes']
279292
af['Form'] = ['Emp']
280-
af['VerbForm'] = ['Part']
293+
af['VerbForm'] = ['Fin', 'Part']
281294
af['Degree'].append('Dim')
282295
self.check_allowed_features(node, af)
283296
# PARTICLES ############################################################
@@ -289,6 +302,7 @@ def process_node(self, node):
289302
if self.flavio:
290303
af['Form'] = ['Emp']
291304
af['PronType'] = ['Dem']
305+
af['Compound'] = ['Yes']
292306
self.check_allowed_features(node, af)
293307
# CONJUNCTIONS #########################################################
294308
elif re.match(r'^[CS]CONJ$', node.upos):
@@ -301,6 +315,8 @@ def process_node(self, node):
301315
af['Form'] = ['Emp']
302316
af['VerbForm'] = ['Fin']
303317
af['NumType'] = ['Card']
318+
af['ConjType'] = ['Expl']
319+
af['AdvType'] = ['Loc']
304320
self.check_allowed_features(node, af)
305321
# ADPOSITIONS ##########################################################
306322
elif node.upos == 'ADP':
@@ -310,9 +326,13 @@ def process_node(self, node):
310326
'Abbr': ['Yes']
311327
}
312328
if self.flavio:
313-
af['VerbForm'] = ['Part'],
329+
af['VerbForm'] = ['Part']
314330
af['Proper'] = ['Yes']
331+
af['Compound'] = ['Yes']
315332
self.check_allowed_features(node, af)
333+
# X ##########################################################
334+
elif node.upos == 'X':
335+
af = {'Abbr': ['Yes']}
316336
# THE REST: NO FEATURES ################################################
317337
else:
318338
self.check_allowed_features(node, {})

0 commit comments

Comments
 (0)