2020import java .io .IOException ;
2121import java .util .Locale ;
2222
23- import org .junit .jupiter .api .Assertions ;
24- import org .junit .jupiter .api .BeforeAll ;
2523import org .junit .jupiter .api .Test ;
2624
2725import opennlp .tools .dictionary .Dictionary ;
2826
27+ import static org .junit .jupiter .api .Assertions .assertAll ;
28+ import static org .junit .jupiter .api .Assertions .assertEquals ;
29+ import static org .junit .jupiter .api .Assertions .assertNotNull ;
30+ import static org .junit .jupiter .api .Assertions .fail ;
31+
2932/**
3033 * Tests for the {@link SentenceDetectorME} class.
3134 * <p>
@@ -45,19 +48,25 @@ public class SentenceDetectorMEGermanTest extends AbstractSentenceDetectorTest {
4548
4649 private static SentenceModel sentdetectModel ;
4750
48- @ BeforeAll
49- public static void prepareResources () throws IOException {
50- Dictionary abbreviationDict = loadAbbDictionary (Locale .GERMAN );
51- SentenceDetectorFactory factory = new SentenceDetectorFactory (
52- "deu" , true , abbreviationDict , EOS_CHARS );
53- sentdetectModel = train (factory , Locale .GERMAN );
54- Assertions .assertNotNull (sentdetectModel );
55- Assertions .assertEquals ("deu" , sentdetectModel .getLanguage ());
51+ private void prepareResources (boolean useTokenEnd ) {
52+ try {
53+ Dictionary abbreviationDict = loadAbbDictionary (Locale .GERMAN );
54+ SentenceDetectorFactory factory = new SentenceDetectorFactory (
55+ "deu" , useTokenEnd , abbreviationDict , EOS_CHARS );
56+
57+ sentdetectModel = train (factory , Locale .GERMAN );
58+ assertNotNull (sentdetectModel );
59+ assertEquals ("deu" , sentdetectModel .getLanguage ());
60+ } catch (IOException ex ) {
61+ fail ("Couldn't train the SentenceModel using test data. Exception: " + ex .getMessage ());
62+ }
5663 }
5764
5865 // Example taken from 'Sentences_DE.txt'
5966 @ Test
6067 void testSentDetectWithInlineAbbreviationsEx1 () {
68+ prepareResources (true );
69+
6170 final String sent1 = "Ein Traum, zu dessen Bildung eine besonders starke Verdichtung beigetragen, " +
6271 "wird für diese Untersuchung das günstigste Material sein." ;
6372 // Here we have two abbreviations "S. = Seite" and "ff. = folgende (Plural)"
@@ -66,40 +75,64 @@ void testSentDetectWithInlineAbbreviationsEx1() {
6675 SentenceDetectorME sentDetect = new SentenceDetectorME (sentdetectModel );
6776 String sampleSentences = sent1 + " " + sent2 ;
6877 String [] sents = sentDetect .sentDetect (sampleSentences );
69- Assertions . assertEquals (2 , sents .length );
70- Assertions . assertEquals (sent1 , sents [0 ]);
71- Assertions . assertEquals (sent2 , sents [1 ]);
78+ assertEquals (2 , sents .length );
79+ assertEquals (sent1 , sents [0 ]);
80+ assertEquals (sent2 , sents [1 ]);
7281 double [] probs = sentDetect .getSentenceProbabilities ();
73- Assertions . assertEquals (2 , probs .length );
82+ assertEquals (2 , probs .length );
7483 }
7584
7685 // Reduced example taken from 'Sentences_DE.txt'
7786 @ Test
7887 void testSentDetectWithInlineAbbreviationsEx2 () {
88+ prepareResources (true );
89+
7990 // Here we have three abbreviations: "S. = Seite", "vgl. = vergleiche", and "f. = folgende (Singular)"
8091 final String sent1 = "Die farbige Tafel, die ich aufschlage, " +
8192 "geht (vgl. die Analyse S. 185 f.) auf ein neues Thema ein." ;
8293
8394 SentenceDetectorME sentDetect = new SentenceDetectorME (sentdetectModel );
8495 String [] sents = sentDetect .sentDetect (sent1 );
85- Assertions . assertEquals (1 , sents .length );
86- Assertions . assertEquals (sent1 , sents [0 ]);
96+ assertEquals (1 , sents .length );
97+ assertEquals (sent1 , sents [0 ]);
8798 double [] probs = sentDetect .getSentenceProbabilities ();
88- Assertions . assertEquals (1 , probs .length );
99+ assertEquals (1 , probs .length );
89100 }
90101
91102 // Modified example deduced from 'Sentences_DE.txt'
92103 @ Test
93104 void testSentDetectWithInlineAbbreviationsEx3 () {
105+ prepareResources (true );
106+
94107 // Here we have two abbreviations "z. B. = zum Beispiel" and "S. = Seite"
95108 final String sent1 = "Die farbige Tafel, die ich aufschlage, " +
96109 "geht (z. B. die Analyse S. 185) auf ein neues Thema ein." ;
97110
98111 SentenceDetectorME sentDetect = new SentenceDetectorME (sentdetectModel );
99112 String [] sents = sentDetect .sentDetect (sent1 );
100- Assertions .assertEquals (1 , sents .length );
101- Assertions .assertEquals (sent1 , sents [0 ]);
113+ assertEquals (1 , sents .length );
114+ assertEquals (sent1 , sents [0 ]);
115+ double [] probs = sentDetect .getSentenceProbabilities ();
116+ assertEquals (1 , probs .length );
117+ }
118+
119+ @ Test
120+ void testSentDetectWithUseTokenEndFalse () {
121+ prepareResources (false );
122+
123+ final String sent1 = "Träume sind eine Verbindung von Gedanken." ;
124+ final String sent2 = "Verschiedene Gedanken sind während der Traumformation aktiv." ;
125+
126+ SentenceDetectorME sentDetect = new SentenceDetectorME (sentdetectModel );
127+ //There is no blank space before start of the second sentence.
128+ String [] sents = sentDetect .sentDetect (sent1 + sent2 );
102129 double [] probs = sentDetect .getSentenceProbabilities ();
103- Assertions .assertEquals (1 , probs .length );
130+ assertAll (
131+ () -> assertEquals (2 , sents .length ),
132+ () -> assertEquals (sent1 , sents [0 ]),
133+ () -> assertEquals (sent1 , sents [0 ]),
134+ () -> assertEquals (sent2 , sents [1 ]),
135+ () -> assertEquals (2 , probs .length )
136+ );
104137 }
105138}
0 commit comments