@@ -236,16 +236,17 @@ public class CoNLLUSentence {
236236 * Process line for current sentence. Return true if processing empty line (indicating sentence end)
237237 **/
238238 public boolean processLine (String line ) {
239- if (COMMENT_LINE .matcher (line ).matches ())
239+ if (COMMENT_LINE .matcher (line ).matches ()) {
240240 addSentenceData (line );
241- else if (MWT_LINE .matcher (line ).matches ())
241+ } else if (MWT_LINE .matcher (line ).matches ()) {
242242 addMWTData (line );
243- else if (TOKEN_LINE .matcher (line ).matches ())
243+ } else if (TOKEN_LINE .matcher (line ).matches ()) {
244244 tokenLines .add (line );
245- else if (EMPTY_LINE .matcher (line ).matches ())
245+ } else if (EMPTY_LINE .matcher (line ).matches ()) {
246246 emptyLines .add (line );
247- else
247+ } else {
248248 return true ;
249+ }
249250 return false ;
250251 }
251252
@@ -301,8 +302,14 @@ public List<CoNLLUDocument> readCoNLLUFileCreateCoNLLUDocuments(String filePath)
301302 for (String line : lines ) {
302303 // if start of a new doc, reset for a new doc
303304 if (DOCUMENT_LINE .matcher (line ).matches ()) {
305+ // since the next sentence gets added to the previous doc
306+ // (see below), we'll need to remove that
307+ if (docs .size () > 0 ) {
308+ docs .get (docs .size () - 1 ).sentences .remove (docs .get (docs .size () - 1 ).sentences .size () - 1 );
309+ }
310+ // the new document comes prebuilt with a blank sentence, so,
311+ // no need to add one here
304312 docs .add (new CoNLLUDocument ());
305- docs .get (docs .size () - 1 ).sentences .add (new CoNLLUSentence ());
306313 }
307314 // read in current line
308315 boolean endSentence = docs .get (docs .size () - 1 ).lastSentence ().processLine (line );
0 commit comments