@@ -45,10 +45,15 @@ public class CoNLLUReaderITest {
4545 {"de" , "allí" , "el" , "rebaja" , "." },
4646 };
4747
48- static final String [][] EXPECTED_CPOS = {
48+ static final String [][] EXPECTED_UPOS = {
4949 {"CCONJ" , "DET" , "NOUN" , "ADP" , "NUM" , "ADV" , "ADJ" , "ADP" , "DET" , "DET" , "NOUN" , "ADV" , "AUX" , "VERB" , "PRON" , "ADP" , "DET" , "NOUN" , "ADP" , "NOUN" , "PUNCT" },
5050 {"ADP" , "ADV" , "DET" , "NOUN" , "PUNCT" },
5151 };
52+ static final String [][] EXPECTED_XPOS = {
53+ {"cc" , "da0fs0" , "ncfs000" , "sps00" , "dn0cp0" , "rg" , "aq0mpp" , "sps00" , "da0fs0" , "di0fs0" , "ncfs000" , "rg" , "vmii3s0" , "vmn0000" , null , "sps00" , "di0ms0" , "ncms000" , "sps00" , "ncfs000" , "fp" },
54+ {"sps00" , "rg" , "da0fp0" , "ncfp000" , "fp" },
55+ };
56+
5257
5358 static final String [][] EXPECTED_FEATS = {
5459 {
@@ -133,15 +138,17 @@ public void testReadingInCoNLLUFile() throws ClassNotFoundException, IOException
133138 List <CoreLabel > tokens = sentence .get (CoreAnnotations .TokensAnnotation .class );
134139 assertEquals (EXPECTED_WORD_TEXT [i ].length , tokens .size ());
135140 assertEquals (EXPECTED_LEMMA_TEXT [i ].length , tokens .size ());
136- assertEquals (EXPECTED_CPOS [i ].length , tokens .size ());
141+ assertEquals (EXPECTED_UPOS [i ].length , tokens .size ());
142+ assertEquals (EXPECTED_XPOS [i ].length , tokens .size ());
137143 for (int j = 0 ; j < tokens .size (); ++j ) {
138144 CoreLabel token = tokens .get (j );
139145 assertEquals (EXPECTED_WORD_TEXT [i ][j ], token .value ());
140146 assertEquals (EXPECTED_WORD_TEXT [i ][j ], token .word ());
141147 assertEquals (EXPECTED_WORD_TEXT [i ][j ], token .get (CoreAnnotations .OriginalTextAnnotation .class ));
142148
143149 assertEquals (EXPECTED_LEMMA_TEXT [i ][j ], token .lemma ());
144- assertEquals (EXPECTED_CPOS [i ][j ], token .tag ());
150+ assertEquals (EXPECTED_UPOS [i ][j ], token .get (CoreAnnotations .CoarseTagAnnotation .class ));
151+ assertEquals (EXPECTED_XPOS [i ][j ], token .tag ());
145152
146153 assertEquals (Integer .valueOf (i ), token .get (CoreAnnotations .SentenceIndexAnnotation .class ));
147154 assertEquals (Integer .valueOf (j +1 ), token .get (CoreAnnotations .IndexAnnotation .class ));
@@ -239,6 +246,11 @@ public void testReadingInCoNLLUFile() throws ClassNotFoundException, IOException
239246 assertEquals (expected , feats );
240247 }
241248
249+ // Some of the AnCora sentences don't have XPOS
250+ if (token .containsKey (CoreAnnotations .PartOfSpeechAnnotation .class )) {
251+ expectedKeys += 1 ;
252+ }
253+
242254 // the MWT token specifically gets one more field, the MWT text
243255 if (i == 0 && (j == 13 || j == 14 )) {
244256 expectedKeys += 1 ;
@@ -252,6 +264,7 @@ public void testReadingInCoNLLUFile() throws ClassNotFoundException, IOException
252264 // CoreAnnotations.IsNewlineAnnotation
253265 // CoreAnnotations.LemmaAnnotation
254266 // CoreAnnotations.PartOfSpeechAnnotation
267+ // CoreAnnotations.CoarseTagAnnotation
255268 // CoreAnnotations.IndexAnnotation
256269 // CoreAnnotations.AfterAnnotation
257270 // CoreAnnotations.BeforeAnnotation
0 commit comments