@@ -25,6 +25,10 @@ public class CoNLLUReader {
2525 **/
2626 // TODO: we should handle field 8, DEPS, for an enhanced dependencies
2727 // doing that requires processing the empty nodes somehow
28+ // TODO: read sent_id?
29+ // TODO: read comments in general
30+ // TODO: MWT should have after/before set to ""
31+ // TODO: reconsider the newline as the after on the last word
2832 public static final int CoNLLU_IndexField = 0 ;
2933 public static final int CoNLLU_WordField = 1 ;
3034 public static final int CoNLLU_LemmaField = 2 ;
@@ -275,7 +279,7 @@ public Annotation convertCoNLLUDocumentToAnnotation(CoNLLUDocument doc) {
275279 if (sentenceIdx > 0 ) {
276280 // for now we're treating a CoNLL-U document as sentences separated by newline
277281 // so every sentence after the first should have a newline as the previous character
278- sentence .get (CoreAnnotations .TokensAnnotation .class ).get (0 ).setBefore (" \n " );
282+ sentence .get (CoreAnnotations .TokensAnnotation .class ).get (0 ).setBefore (System . lineSeparator () );
279283 }
280284 for (CoreLabel token : sentence .get (CoreAnnotations .TokensAnnotation .class )) {
281285 token .set (CoreAnnotations .TokenBeginAnnotation .class , documentIdx );
@@ -382,7 +386,7 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
382386 coreLabels .add (cl );
383387 }
384388 // the last token should have a newline after
385- coreLabels .get (coreLabels .size () - 1 ).setAfter (" \n " );
389+ coreLabels .get (coreLabels .size () - 1 ).setAfter (System . lineSeparator () );
386390 // set before
387391 coreLabels .get (0 ).setBefore ("" );
388392 for (int i = 1 ; i < coreLabels .size () ; i ++) {
0 commit comments