Skip to content

Commit 52955e9

Browse files
committed
OPENNLP-1745: SentenceDetector - Add Junit test for useTokenEnd = false
1 parent fe59eb9 commit 52955e9

File tree

1 file changed

+53
-20
lines changed

1 file changed

+53
-20
lines changed

opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java

Lines changed: 53 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,15 @@
2020
import java.io.IOException;
2121
import java.util.Locale;
2222

23-
import org.junit.jupiter.api.Assertions;
24-
import org.junit.jupiter.api.BeforeAll;
2523
import org.junit.jupiter.api.Test;
2624

2725
import opennlp.tools.dictionary.Dictionary;
2826

27+
import static org.junit.jupiter.api.Assertions.assertAll;
28+
import static org.junit.jupiter.api.Assertions.assertEquals;
29+
import static org.junit.jupiter.api.Assertions.assertNotNull;
30+
import static org.junit.jupiter.api.Assertions.fail;
31+
2932
/**
3033
* Tests for the {@link SentenceDetectorME} class.
3134
* <p>
@@ -45,19 +48,25 @@ public class SentenceDetectorMEGermanTest extends AbstractSentenceDetectorTest {
4548

4649
private static SentenceModel sentdetectModel;
4750

48-
@BeforeAll
49-
public static void prepareResources() throws IOException {
50-
Dictionary abbreviationDict = loadAbbDictionary(Locale.GERMAN);
51-
SentenceDetectorFactory factory = new SentenceDetectorFactory(
52-
"deu", true, abbreviationDict, EOS_CHARS);
53-
sentdetectModel = train(factory, Locale.GERMAN);
54-
Assertions.assertNotNull(sentdetectModel);
55-
Assertions.assertEquals("deu", sentdetectModel.getLanguage());
51+
private void prepareResources(boolean useTokenEnd) {
52+
try {
53+
Dictionary abbreviationDict = loadAbbDictionary(Locale.GERMAN);
54+
SentenceDetectorFactory factory = new SentenceDetectorFactory(
55+
"deu", useTokenEnd, abbreviationDict, EOS_CHARS);
56+
57+
sentdetectModel = train(factory, Locale.GERMAN);
58+
assertNotNull(sentdetectModel);
59+
assertEquals("deu", sentdetectModel.getLanguage());
60+
} catch (IOException ex) {
61+
fail("Couldn't train the SentenceModel using test data. Exception: " + ex.getMessage());
62+
}
5663
}
5764

5865
// Example taken from 'Sentences_DE.txt'
5966
@Test
6067
void testSentDetectWithInlineAbbreviationsEx1() {
68+
prepareResources(true);
69+
6170
final String sent1 = "Ein Traum, zu dessen Bildung eine besonders starke Verdichtung beigetragen, " +
6271
"wird für diese Untersuchung das günstigste Material sein.";
6372
// Here we have two abbreviations "S. = Seite" and "ff. = folgende (Plural)"
@@ -66,40 +75,64 @@ void testSentDetectWithInlineAbbreviationsEx1() {
6675
SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel);
6776
String sampleSentences = sent1 + " " + sent2;
6877
String[] sents = sentDetect.sentDetect(sampleSentences);
69-
Assertions.assertEquals(2, sents.length);
70-
Assertions.assertEquals(sent1, sents[0]);
71-
Assertions.assertEquals(sent2, sents[1]);
78+
assertEquals(2, sents.length);
79+
assertEquals(sent1, sents[0]);
80+
assertEquals(sent2, sents[1]);
7281
double[] probs = sentDetect.getSentenceProbabilities();
73-
Assertions.assertEquals(2, probs.length);
82+
assertEquals(2, probs.length);
7483
}
7584

7685
// Reduced example taken from 'Sentences_DE.txt'
7786
@Test
7887
void testSentDetectWithInlineAbbreviationsEx2() {
88+
prepareResources(true);
89+
7990
// Here we have three abbreviations: "S. = Seite", "vgl. = vergleiche", and "f. = folgende (Singular)"
8091
final String sent1 = "Die farbige Tafel, die ich aufschlage, " +
8192
"geht (vgl. die Analyse S. 185 f.) auf ein neues Thema ein.";
8293

8394
SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel);
8495
String[] sents = sentDetect.sentDetect(sent1);
85-
Assertions.assertEquals(1, sents.length);
86-
Assertions.assertEquals(sent1, sents[0]);
96+
assertEquals(1, sents.length);
97+
assertEquals(sent1, sents[0]);
8798
double[] probs = sentDetect.getSentenceProbabilities();
88-
Assertions.assertEquals(1, probs.length);
99+
assertEquals(1, probs.length);
89100
}
90101

91102
// Modified example deduced from 'Sentences_DE.txt'
92103
@Test
93104
void testSentDetectWithInlineAbbreviationsEx3() {
105+
prepareResources(true);
106+
94107
// Here we have two abbreviations "z. B. = zum Beispiel" and "S. = Seite"
95108
final String sent1 = "Die farbige Tafel, die ich aufschlage, " +
96109
"geht (z. B. die Analyse S. 185) auf ein neues Thema ein.";
97110

98111
SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel);
99112
String[] sents = sentDetect.sentDetect(sent1);
100-
Assertions.assertEquals(1, sents.length);
101-
Assertions.assertEquals(sent1, sents[0]);
113+
assertEquals(1, sents.length);
114+
assertEquals(sent1, sents[0]);
115+
double[] probs = sentDetect.getSentenceProbabilities();
116+
assertEquals(1, probs.length);
117+
}
118+
119+
@Test
120+
void testSentDetectWithUseTokenEndFalse() {
121+
prepareResources(false);
122+
123+
final String sent1 = "Träume sind eine Verbindung von Gedanken.";
124+
final String sent2 = "Verschiedene Gedanken sind während der Traumformation aktiv.";
125+
126+
SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel);
127+
//There is no blank space before start of the second sentence.
128+
String[] sents = sentDetect.sentDetect(sent1 + sent2);
102129
double[] probs = sentDetect.getSentenceProbabilities();
103-
Assertions.assertEquals(1, probs.length);
130+
assertAll(
131+
() -> assertEquals(2, sents.length),
132+
() -> assertEquals(sent1, sents[0]),
133+
() -> assertEquals(sent1, sents[0]),
134+
() -> assertEquals(sent2, sents[1]),
135+
() -> assertEquals(2, probs.length)
136+
);
104137
}
105138
}

0 commit comments

Comments
 (0)