12
12
13
13
class MarkFeatsBugs (udapi .block .ud .markfeatsbugs .MarkFeatsBugs ):
14
14
15
+ def __init__ (self , flavio = False , ** kwargs ):
16
+ """
17
+ Create the ud.la.MarkFeatsBugs block instance.
18
+
19
+ Args:
20
+ flavio=1: Accept features as defined by Flavio for treebanks he
21
+ maintains. By default, a more conservative set of features and
22
+ values is expected.
23
+ """
24
+ super ().__init__ (** kwargs )
25
+ self .flavio = flavio
26
+
15
27
def process_node (self , node ):
16
28
# NOUNS ################################################################
17
29
if node .upos == 'NOUN' :
18
- self . check_required_features ( node , ['Gender' , 'Number' , 'Case' ])
19
- self . check_allowed_features ( node , {
30
+ rf = ['Gender' , 'Number' , 'Case' ]
31
+ af = {
20
32
'Gender' : ['Masc' , 'Fem' , 'Neut' ],
21
33
'Number' : ['Sing' , 'Plur' ],
22
34
'Case' : ['Nom' , 'Gen' , 'Dat' , 'Acc' , 'Voc' , 'Loc' , 'Abl' ],
23
- 'Foreign' : ['Yes' ]})
35
+ 'Foreign' : ['Yes' ]}
36
+ if self .flavio :
37
+ rf .append ('InflClass' )
38
+ af ['InflClass' ] = ['IndEurA' , 'IndEurO' , 'IndEurX' ]
39
+ self .check_required_features (node , rf )
40
+ self .check_allowed_features (node , af )
24
41
# PROPER NOUNS #########################################################
25
42
elif node .upos == 'PROPN' :
26
43
self .check_required_features (node , ['Gender' , 'Number' , 'Case' ])
@@ -32,13 +49,20 @@ def process_node(self, node):
32
49
'Foreign' : ['Yes' ]})
33
50
# ADJECTIVES ###########################################################
34
51
elif node .upos == 'ADJ' :
35
- self . check_required_features ( node , ['Gender' , 'Number' , 'Case' , 'Degree' ])
36
- self . check_allowed_features ( node , {
52
+ rf = ['Gender' , 'Number' , 'Case' , 'Degree' ]
53
+ af = {
37
54
'Gender' : ['Masc' , 'Fem' , 'Neut' ],
38
55
'Number' : ['Sing' , 'Plur' ],
39
56
'Case' : ['Nom' , 'Gen' , 'Dat' , 'Acc' , 'Voc' , 'Loc' , 'Abl' ],
40
- 'Degree' : ['Pos' , 'Cmp' , 'Sup' ],
41
- 'Foreign' : ['Yes' ]})
57
+ 'Degree' : ['Pos' , 'Cmp' , 'Sup' , 'Abs' ],
58
+ 'Foreign' : ['Yes' ]}
59
+ if self .flavio :
60
+ # Flavio does not use Degree=Pos, hence Degree is not required.
61
+ rf = [f for f in rf if f != 'Degree' ]
62
+ rf .append ('InflClass' )
63
+ af ['InflClass' ] = ['IndEurA' , 'IndEurO' , 'IndEurX' ]
64
+ self .check_required_features (node , rf )
65
+ self .check_allowed_features (node , af )
42
66
# PRONOUNS #############################################################
43
67
elif node .upos == 'PRON' :
44
68
self .check_required_features (node , ['PronType' ])
@@ -81,13 +105,19 @@ def process_node(self, node):
81
105
'Case' : ['Nom' , 'Gen' , 'Dat' , 'Acc' , 'Voc' , 'Loc' , 'Abl' ]
82
106
})
83
107
else :
84
- self . check_required_features ( node , ['PronType' , 'Gender' , 'Number' , 'Case' ])
85
- self . check_allowed_features ( node , {
108
+ rf = ['PronType' , 'Gender' , 'Number' , 'Case' ]
109
+ af = {
86
110
'PronType' : ['Dem' , 'Int' , 'Rel' , 'Ind' , 'Neg' , 'Tot' , 'Emp' ],
87
111
'Gender' : ['Masc' , 'Fem' , 'Neut' ],
88
112
'Number' : ['Sing' , 'Plur' ],
89
- 'Case' : ['Nom' , 'Gen' , 'Dat' , 'Acc' , 'Voc' , 'Loc' , 'Abl' ]
90
- })
113
+ 'Case' : ['Nom' , 'Gen' , 'Dat' , 'Acc' , 'Voc' , 'Loc' , 'Abl' ]}
114
+ if self .flavio :
115
+ rf .append ('InflClass' )
116
+ af ['PronType' ].append ('Con' )
117
+ af ['InflClass' ] = ['LatPron' ]
118
+ af ['Form' ] = ['Emp' ]
119
+ self .check_required_features (node , rf )
120
+ self .check_allowed_features (node , af )
91
121
# NUMERALS #############################################################
92
122
elif node .upos == 'NUM' :
93
123
self .check_required_features (node , ['NumType' , 'NumForm' ])
@@ -98,73 +128,52 @@ def process_node(self, node):
98
128
'NumForm' : ['Digit' , 'Roman' ]
99
129
})
100
130
else :
101
- self .check_required_features (node , ['NumType' , 'NumForm' , 'Number' , 'Case' ])
131
+ self .check_required_features (node , ['NumType' , 'NumForm' ])
102
132
self .check_allowed_features (node , {
103
133
'NumType' : ['Card' ],
104
- 'NumForm' : ['Word' ],
105
- 'Number' : ['Plur' ],
106
- 'Case' : ['Nom' , 'Gen' , 'Dat' , 'Acc' , 'Voc' , 'Loc' , 'Abl' ]
134
+ 'NumForm' : ['Word' ]
107
135
})
108
136
# VERBS AND AUXILIARIES ################################################
109
137
elif re .match (r'^(VERB|AUX)$' , node .upos ):
110
- self .check_required_features (node , ['Aspect' , 'VerbForm' ])
111
- if node .feats ['VerbForm' ] == 'Inf' :
112
- self .check_allowed_features (node , {
113
- 'Aspect' : ['Imp' , 'Perf' , 'Prosp' ],
114
- 'VerbForm' : ['Inf' ],
115
- 'Polarity' : ['Pos' , 'Neg' ]
116
- })
117
- elif node .feats ['VerbForm' ] == 'Fin' :
118
- if node .feats ['Mood' ] == 'Imp' :
119
- self .check_required_features (node , ['Mood' , 'Person' , 'Number' ])
120
- self .check_allowed_features (node , {
121
- 'Aspect' : ['Imp' , 'Perf' , 'Prosp' ],
122
- 'VerbForm' : ['Fin' ],
123
- 'Mood' : ['Imp' ],
124
- 'Person' : ['1' , '2' , '3' ],
125
- 'Number' : ['Sing' , 'Plur' ],
126
- 'Polarity' : ['Pos' , 'Neg' ]
127
- })
128
- else : # indicative or subjunctive
129
- self .check_required_features (node , ['Mood' , 'Voice' , 'Tense' , 'Person' , 'Number' ])
130
- self .check_allowed_features (node , {
131
- 'Aspect' : ['Imp' , 'Perf' , 'Prosp' ],
132
- 'VerbForm' : ['Fin' ],
133
- 'Mood' : ['Ind' , 'Sub' ],
134
- 'Tense' : ['Past' , 'Imp' , 'Pres' , 'Fut' ], # only in indicative
135
- 'Voice' : ['Act' ],
136
- 'Person' : ['1' , '2' , '3' ],
137
- 'Number' : ['Sing' , 'Plur' ],
138
- 'Polarity' : ['Pos' , 'Neg' ]
139
- })
138
+ rf = ['Aspect' , 'VerbForm' ]
139
+ af = {
140
+ 'Aspect' : ['Imp' , 'Perf' , 'Prosp' ],
141
+ 'VerbForm' : ['Inf' , 'Fin' , 'Part' , 'Vnoun' ],
142
+ 'Polarity' : ['Pos' , 'Neg' ]}
143
+ if node .feats ['VerbForm' ] == 'Fin' :
144
+ rf .extend (['Mood' , 'Person' , 'Number' ])
145
+ af ['Mood' ] = ['Ind' , 'Sub' , 'Imp' ]
146
+ af ['Person' ] = ['1' , '2' , '3' ]
147
+ af ['Number' ] = ['Sing' , 'Plur' ]
148
+ if re .match (r'^(Ind|Sub)$' , node .feats ['Mood' ]): # indicative or subjunctive
149
+ rf .extend (['Voice' , 'Tense' ])
150
+ af ['Voice' ] = ['Act' , 'Pass' ]
151
+ af ['Tense' ] = ['Past' , 'Imp' , 'Pres' , 'Fut' ]
140
152
elif node .feats ['VerbForm' ] == 'Part' :
141
- self .check_required_features (node , ['Tense' , 'Gender' , 'Number' , 'Voice' ])
142
- self .check_allowed_features (node , {
143
- 'Aspect' : ['Imp' , 'Perf' , 'Prosp' ],
144
- 'VerbForm' : ['Part' ],
145
- 'Tense' : ['Past' ],
146
- 'Voice' : ['Act' ], # passive participle is ADJ, so we will not encounter it under VERB
147
- 'Number' : ['Sing' , 'Plur' ],
148
- 'Gender' : ['Masc' , 'Fem' , 'Neut' ],
149
- 'Polarity' : ['Pos' , 'Neg' ]
150
- })
153
+ rf .extend (['Tense' , 'Gender' , 'Number' , 'Voice' ])
154
+ af ['Tense' ] = ['Past' ]
155
+ af ['Voice' ] = ['Act' ]
156
+ af ['Number' ] = ['Sing' , 'Plur' ]
157
+ af ['Gender' ] = ['Masc' , 'Fem' , 'Neut' ]
151
158
else : # verbal noun
152
- self .check_required_features (node , ['Tense' , 'Number' , 'Voice' ])
153
- self .check_allowed_features (node , {
154
- 'Aspect' : ['Imp' , 'Perf' , 'Prosp' ],
155
- 'VerbForm' : ['Vnoun' ],
156
- 'Tense' : ['Past' , 'Pres' ],
157
- 'Voice' : ['Act' ],
158
- 'Number' : ['Sing' , 'Plur' ],
159
- 'Gender' : ['Masc' , 'Fem' , 'Neut' ], # annotated only in singular
160
- 'Polarity' : ['Pos' , 'Neg' ]
161
- })
159
+ rf .extend (['Tense' , 'Voice' ])
160
+ af ['Tense' ] = ['Past' , 'Pres' ]
161
+ af ['Voice' ] = ['Act' ]
162
+ af ['Gender' ] = ['Masc' , 'Fem' , 'Neut' ]
163
+ if self .flavio :
164
+ # Flavio has killed Tense in his treebanks.
165
+ rf = [f for f in rf if f != 'Tense' ]
166
+ # Flavio added InflClass but not everywhere, so it is not required.
167
+ af ['InflClass' ] = ['LatA' , 'LatAnom' , 'LatE' , 'LatI2' , 'LatX' ]
168
+ self .check_required_features (node , rf )
169
+ self .check_allowed_features (node , af )
162
170
# ADVERBS ##############################################################
163
171
elif node .upos == 'ADV' :
164
172
if node .feats ['PronType' ] != '' :
165
173
# Pronominal adverbs are neither compared nor negated.
166
174
self .check_allowed_features (node , {
167
- 'PronType' : ['Dem' , 'Int' , 'Rel' , 'Ind' , 'Neg' , 'Tot' ]
175
+ 'PronType' : ['Dem' , 'Int' , 'Rel' , 'Ind' , 'Neg' , 'Tot' ],
176
+ 'AdvType' : ['Loc' ]
168
177
})
169
178
else :
170
179
# The remaining adverbs are neither pronominal, nor compared or
0 commit comments