@@ -28,7 +28,8 @@ def process_node(self, node):
28
28
rf = []
29
29
af = {}
30
30
# PROIEL-specific: greek words without features
31
- if node .lemma == 'greek.expression' :
31
+ # LLCT-specific: corrupted nodes
32
+ if node .lemma in ['greek.expression' , 'missing^token' ]:
32
33
pass
33
34
# NOUNS ################################################################
34
35
elif node .upos == 'NOUN' :
@@ -41,12 +42,14 @@ def process_node(self, node):
41
42
'Degree' : ['Dim' ],
42
43
'Abbr' : ['Yes' ],
43
44
'Foreign' : ['Yes' ],
44
- 'VerbForm' : ['Part' ]}
45
+ 'VerbForm' : ['Part' , 'Vnoun' ]}
45
46
if self .flavio :
46
47
# Flavio added InflClass but not everywhere, so it is not required.
47
- af ['InflClass' ] = ['IndEurA' , 'IndEurE' , 'IndEurI' , 'IndEurO' , 'IndEurU' , 'IndEurX' ]
48
+ af ['InflClass' ] = ['Ind' , ' IndEurA' , 'IndEurE' , 'IndEurI' , 'IndEurO' , 'IndEurU' , 'IndEurX' ]
48
49
af ['Proper' ] = ['Yes' ]
50
+ af ['Polarity' ] = ['Neg' ]
49
51
af ['Compound' ] = ['Yes' ]
52
+ af ['Variant' ] = ['Greek' ]
50
53
af ['NameType' ] = ['Ast' , 'Cal' , 'Com' , 'Geo' , 'Giv' , 'Let' , 'Lit' , 'Met' , 'Nat' , 'Rel' , 'Sur' , 'Oth' ]
51
54
self .check_required_features (node , rf )
52
55
self .check_allowed_features (node , af )
@@ -61,18 +64,18 @@ def process_node(self, node):
61
64
'Abbr' : ['Yes' ],
62
65
'Foreign' : ['Yes' ]}
63
66
if self .flavio :
64
- af ['Compound' ] = 'Yes'
67
+ af ['Compound' ] = ['Yes' ]
68
+ af ['Variant' ] = ['Greek' ]
65
69
af ['NameType' ] = ['Ast' , 'Cal' , 'Com' , 'Geo' , 'Giv' , 'Let' , 'Lit' , 'Met' , 'Nat' , 'Rel' , 'Sur' , 'Oth' ]
66
- if not node .feats ['Abbr' ] == 'Yes' and node .feats ['Case' ]:
67
- af ['InflClass' ] = ['IndEurA' , 'IndEurE' , 'IndEurI' , 'IndEurO' , 'IndEurU' , 'IndEurX' ]
70
+ af ['InflClass' ] = ['Ind' , 'IndEurA' , 'IndEurE' , 'IndEurI' , 'IndEurO' , 'IndEurU' , 'IndEurX' ]
68
71
self .check_required_features (node , rf )
69
72
self .check_allowed_features (node , af )
70
73
# ADJECTIVES ###########################################################
71
74
elif node .upos == 'ADJ' :
72
75
if not node .feats ['Abbr' ] == 'Yes' and node .feats ['Case' ]:
73
76
rf = ['Gender' , 'Number' , 'Case' ]
74
77
af = {
75
- 'NumType' : ['Ord ' , 'Dist ' ],
78
+ 'NumType' : ['Dist ' , 'Mult' , 'Ord ' ],
76
79
'Gender' : ['Masc' , 'Fem' , 'Neut' ],
77
80
'Number' : ['Sing' , 'Plur' ],
78
81
'Case' : ['Nom' , 'Gen' , 'Dat' , 'Acc' , 'Voc' , 'Loc' , 'Abl' ],
@@ -83,9 +86,10 @@ def process_node(self, node):
83
86
'VerbForm' : ['Part' ]}
84
87
if self .flavio :
85
88
# Flavio added InflClass but not everywhere, so it is not required.
86
- af ['InflClass' ] = ['IndEurA' , 'IndEurE' , 'IndEurI' , 'IndEurO' , 'IndEurU' , 'IndEurX' ]
89
+ af ['InflClass' ] = ['Ind' , ' IndEurA' , 'IndEurE' , 'IndEurI' , 'IndEurO' , 'IndEurU' , 'IndEurX' ]
87
90
af ['Compound' ] = ['Yes' ]
88
91
af ['Proper' ] = ['Yes' ]
92
+ af ['Variant' ] = ['Greek' ]
89
93
af ['Degree' ].append ('Dim' )
90
94
af ['NameType' ] = ['Ast' , 'Cal' , 'Com' , 'Geo' , 'Giv' , 'Let' , 'Lit' , 'Met' , 'Nat' , 'Rel' , 'Sur' , 'Oth' ]
91
95
self .check_required_features (node , rf )
@@ -112,10 +116,10 @@ def process_node(self, node):
112
116
rf .extend (['Person' , 'Number' ])
113
117
af ['Person' ] = ['1' , '2' , '3' ]
114
118
af ['Number' ] = ['Sing' , 'Plur' ]
115
- # 1st and 2nd person do not have gender
119
+ # 3rd person must have gender
116
120
if node .feats ['Person' ] == '3' : # is, id
117
121
rf .append ('Gender' )
118
- af ['Gender' ] = ['Masc' , 'Fem' , 'Neut' ]
122
+ af ['Gender' ] = ['Masc' , 'Fem' , 'Neut' ]
119
123
elif re .match (r'^(Rel|Int)$' , node .feats ['PronType' ]):
120
124
rf .extend (['Gender' , 'Number' ])
121
125
af ['Gender' ] = ['Masc' , 'Fem' , 'Neut' ]
@@ -126,20 +130,20 @@ def process_node(self, node):
126
130
af ['Number' ] = ['Sing' , 'Plur' ]
127
131
# lexical check of PronTypes
128
132
af ['PronType' ] = []
129
- if node .lemma in ['is ' , 'ego ' , 'tu ' , 'sui' , 'seipsum' , 'nos' , 'uos' , 'vos' , 'tumetipse' , 'nosmetipse' ]:
133
+ if node .lemma in ['ego ' , 'tu ' , 'is ' , 'sui' , 'seipsum' , 'nos' , 'uos' , 'vos' , 'egoipse' , 'egometipse' , ' tumetipse' , 'semetipse ' , 'nosmetipse' ]:
130
134
af ['PronType' ].append ('Prs' )
131
- elif node .lemma in ['quis ' , 'aliquis ' , 'nihil' , 'nemo ' , 'quivis ' , 'qui ' ]:
135
+ elif node .lemma in ['aliquis ' , 'nemo ' , 'nihil' , 'nihilum ' , 'qui ' , 'quis' , 'quisquis' , 'quiuis' , 'quivis ' ]:
132
136
af ['PronType' ].append ('Ind' )
133
137
elif node .lemma in ['inuicem' , 'invicem' ]:
134
138
af ['PronType' ].append ('Rcp' )
135
139
rf .remove ('Case' )
136
- if node .lemma in ['quicumque ' , 'qui ' , 'quisquis' ]:
140
+ if node .lemma in ['qui ' , 'quicumque ' , 'quisquis' ]:
137
141
af ['PronType' ].append ('Rel' )
138
- if node .lemma in ['qui ' , 'quis ' , 'quisnam ' , 'ecquis ' , 'ecqui ' ]:
142
+ if node .lemma in [ 'ecquis ' , 'ecqui ' , 'numquis ' , 'qui ' , 'quis' , 'quisnam ' ]:
139
143
af ['PronType' ].append ('Int' )
140
144
if self .flavio :
141
145
# Flavio added InflClass but not everywhere, so it is not required.
142
- af ['InflClass' ] = ['LatAnom' , 'LatPron' ]
146
+ af ['InflClass' ] = ['Ind' , 'IndEurO' , 'IndEurX' , ' LatAnom' , 'LatPron' ]
143
147
af ['Compound' ] = ['Yes' ]
144
148
af ['Polarity' ] = ['Neg' ]
145
149
af ['Form' ] = ['Emp' ]
@@ -175,25 +179,26 @@ def process_node(self, node):
175
179
if node .lemma in ['suus' , 'meus' , 'noster' , 'tuus' , 'uester' , 'vester' , 'voster' ]:
176
180
if not af ['PronType' ] == ['Prs' ]:
177
181
af ['PronType' ].append ('Prs' )
178
- elif node .lemma in ['aliquot ' , 'quidam ' , 'quispiam ' , 'quivis ' , 'nullus' , 'nonnullus ' , 'aliqui ' , 'qui' , 'quilibet' , 'quantuslibet ' , 'unus ' , 'uterque ' , 'ullus' , 'multus' , 'quisque' , 'paucus' , 'complures' , 'quamplures' , 'quicumque' , 'reliquus' , 'plerusque' , 'aliqualis' , 'quisquam' , 'qualiscumque' ]:
182
+ elif node .lemma in ['aliquantus ' , 'aliqui ' , 'aliquot ' , 'quidam ' , 'nonnullus' , ' nullus' , 'quantuscumque ' , 'quantuslibet ' , 'qui' , 'quilibet' , 'quispiam ' , 'quiuis ' , 'quivis ' , 'quotlibet' , ' ullus' , 'unus' , 'uterque' , 'multus' , 'quisque' , 'paucus' , 'complures' , 'quamplures' , 'quicumque' , 'reliquus' , 'plerusque' , 'aliqualis' , 'quisquam' , 'qualiscumque' ]:
179
183
af ['PronType' ].append ('Ind' )
180
184
elif node .lemma in ['omnis' , 'totus' , 'ambo' , 'cunctus' , 'unusquisque' , 'uniuersus' ]:
181
185
af ['PronType' ].append ('Tot' )
182
186
if node .lemma in ['quantus' , 'qualis' , 'quicumque' , 'quot' , 'quotus' , 'quotquot' ]:
183
187
af ['PronType' ].append ('Rel' )
184
- elif node .lemma in ['qui' , 'quantus' , 'quot' ]:
188
+ if node .lemma in ['qui' , 'quantus' , 'quot' ]:
185
189
af ['PronType' ].append ('Int' )
186
- elif node .lemma in ['hic' , 'ipse' , 'ille' , 'tantus' , 'talis' , 'is' , 'iste' , 'eiusmodi' , 'huiusmodi' , 'idem' , 'totidem' , 'tot' ]:
190
+ elif node .lemma in ['hic' , 'ipse' , 'ille' , 'tantus' , 'talis' , 'is' , 'iste' , 'eiusmodi' , 'huiusmodi' , 'idem' , 'totidem' , 'tot' , 'praedictus' , 'praefatus' , 'suprascriptus' ]:
187
191
af ['PronType' ].append ('Dem' )
188
- elif node .lemma in ['alius' , 'alter' , 'solus' , 'ceterus' , 'alteruter' , 'neuter' , 'uter' ]:
192
+ elif node .lemma in ['alius' , 'alter' , 'solus' , 'ceterus' , 'alteruter' , 'neuter' , 'uter' , 'uterlibet' , 'uterque' ]:
189
193
af ['PronType' ].append ('Con' )
190
194
if self .flavio :
191
195
# Flavio added InflClass but not everywhere, so it is not required.
192
- af ['InflClass' ] = ['IndEurA' , 'IndEurI' , 'IndEurO' , 'IndEurX' , 'LatPron' ]
196
+ af ['InflClass' ] = ['Ind' , ' IndEurA' , 'IndEurI' , 'IndEurO' , 'IndEurX' , 'LatPron' ]
193
197
af ['Compound' ] = ['Yes' ]
194
198
af ['Form' ] = ['Emp' ]
195
199
af ['NumType' ] = ['Card' ]
196
200
af ['Degree' ].append ('Dim' )
201
+ af ['PronType' ].append ('Art' )
197
202
if re .match (r'^(unus|ambo)' , node .lemma ):
198
203
af ['NumValue' ] = ['1' , '2' ]
199
204
self .check_required_features (node , rf )
@@ -202,7 +207,7 @@ def process_node(self, node):
202
207
elif node .upos == 'NUM' :
203
208
rf = ['NumType' , 'NumForm' ]
204
209
af = {
205
- 'NumType' : ['Card' ],
210
+ 'NumType' : ['Card' , 'Ord' ],
206
211
'NumForm' : ['Word' , 'Roman' , 'Digit' ],
207
212
'Proper' : ['Yes' ]}
208
213
# Arabic digits and Roman numerals do not have inflection features.
@@ -212,7 +217,9 @@ def process_node(self, node):
212
217
af ['Case' ] = ['Nom' , 'Gen' , 'Dat' , 'Acc' , 'Voc' , 'Loc' , 'Abl' ]
213
218
if self .flavio :
214
219
# Flavio added InflClass but not everywhere, so it is not required. # e.g. duodecim
215
- af ['InflClass' ] = ['IndEurA' , 'IndEurI' , 'IndEurO' , 'LatPron' ]
220
+ af ['InflClass' ] = ['Ind' , 'IndEurA' , 'IndEurI' , 'IndEurO' , 'LatPron' ]
221
+ af ['NumForm' ].append ('Reference' )
222
+ af ['Compound' ] = ['Yes' ]
216
223
self .check_required_features (node , rf )
217
224
self .check_allowed_features (node , af )
218
225
# VERBS AND AUXILIARIES ################################################
@@ -227,7 +234,7 @@ def process_node(self, node):
227
234
if node .feats ['VerbForm' ] not in ['Part' , 'Conv' ]:
228
235
rf .append ('Tense' )
229
236
af ['Tense' ] = ['Past' , 'Pqp' , 'Pres' , 'Fut' ]
230
- if node .upos == 'VERB' :
237
+ if node .upos == 'VERB' or ( node . upos == 'AUX' and node . lemma != 'sum' ) :
231
238
rf .append ('Voice' )
232
239
af ['Voice' ] = ['Act' , 'Pass' ]
233
240
if node .feats ['VerbForm' ] == 'Fin' : # imperative, indicative or subjunctive
@@ -255,14 +262,20 @@ def process_node(self, node):
255
262
if self .flavio :
256
263
# Flavio added InflClass but not everywhere, so it is not required.
257
264
af ['InflClass' ] = ['LatA' , 'LatAnom' , 'LatE' , 'LatI' , 'LatI2' , 'LatX' ]
265
+ af ['VerbType' ] = ['Mod' ]
258
266
if 'Degree' in af :
259
267
af ['Degree' ].append ('Dim' )
260
268
else :
261
269
af ['Degree' ] = ['Dim' ]
262
270
af ['Compound' ] = ['Yes' ]
263
271
af ['Proper' ] = ['Yes' ]
264
272
if re .match (r'^(Part|Conv)$' , node .feats ['VerbForm' ]):
265
- af ['InflClass[nominal]' ] = ['IndEurA' , 'IndEurI' , 'IndEurO' , 'IndEurU' ]
273
+ af ['InflClass[nominal]' ] = ['IndEurA' , 'IndEurI' , 'IndEurO' , 'IndEurU' , 'IndEurX' ]
274
+ elif node .feats ['VerbForm' ] == 'Inf' :
275
+ af ['Case' ] = ['Nom' , 'Acc' , 'Abl' ]
276
+ af ['Gender' ] = ['Neut' ]
277
+ af ['Number' ] = ['Sing' ]
278
+ af ['InflClass[nominal]' ] = ['Ind' ]
266
279
self .check_required_features (node , rf )
267
280
self .check_allowed_features (node , af )
268
281
# ADVERBS ##############################################################
@@ -271,13 +284,13 @@ def process_node(self, node):
271
284
'AdvType' : ['Loc' , 'Tim' ],
272
285
'PronType' : ['Dem' , 'Int' , 'Rel' , 'Ind' , 'Neg' , 'Tot' , 'Con' ],
273
286
'Degree' : ['Pos' , 'Cmp' , 'Sup' , 'Abs' ],
274
- 'NumType' : ['Card' , 'Ord' ], # e.g., primum
287
+ 'NumType' : ['Card' , 'Mult' , ' Ord' ], # e.g., primum
275
288
'Polarity' : ['Neg' ]
276
289
}
277
290
if self .flavio :
278
291
af ['Compound' ] = ['Yes' ]
279
292
af ['Form' ] = ['Emp' ]
280
- af ['VerbForm' ] = ['Part' ]
293
+ af ['VerbForm' ] = ['Fin' , ' Part' ]
281
294
af ['Degree' ].append ('Dim' )
282
295
self .check_allowed_features (node , af )
283
296
# PARTICLES ############################################################
@@ -289,6 +302,7 @@ def process_node(self, node):
289
302
if self .flavio :
290
303
af ['Form' ] = ['Emp' ]
291
304
af ['PronType' ] = ['Dem' ]
305
+ af ['Compound' ] = ['Yes' ]
292
306
self .check_allowed_features (node , af )
293
307
# CONJUNCTIONS #########################################################
294
308
elif re .match (r'^[CS]CONJ$' , node .upos ):
@@ -301,6 +315,8 @@ def process_node(self, node):
301
315
af ['Form' ] = ['Emp' ]
302
316
af ['VerbForm' ] = ['Fin' ]
303
317
af ['NumType' ] = ['Card' ]
318
+ af ['ConjType' ] = ['Expl' ]
319
+ af ['AdvType' ] = ['Loc' ]
304
320
self .check_allowed_features (node , af )
305
321
# ADPOSITIONS ##########################################################
306
322
elif node .upos == 'ADP' :
@@ -310,9 +326,13 @@ def process_node(self, node):
310
326
'Abbr' : ['Yes' ]
311
327
}
312
328
if self .flavio :
313
- af ['VerbForm' ] = ['Part' ],
329
+ af ['VerbForm' ] = ['Part' ]
314
330
af ['Proper' ] = ['Yes' ]
331
+ af ['Compound' ] = ['Yes' ]
315
332
self .check_allowed_features (node , af )
333
+ # X ##########################################################
334
+ elif node .upos == 'X' :
335
+ af = {'Abbr' : ['Yes' ]}
316
336
# THE REST: NO FEATURES ################################################
317
337
else :
318
338
self .check_allowed_features (node , {})
0 commit comments