|
20 | 20 | import java.io.IOException; |
21 | 21 | import java.util.Locale; |
22 | 22 |
|
23 | | -import org.junit.jupiter.api.Assertions; |
24 | 23 | import org.junit.jupiter.api.BeforeAll; |
25 | 24 | import org.junit.jupiter.api.Test; |
26 | 25 |
|
27 | 26 | import opennlp.tools.dictionary.Dictionary; |
28 | 27 |
|
| 28 | +import static org.junit.jupiter.api.Assertions.assertAll; |
| 29 | +import static org.junit.jupiter.api.Assertions.assertEquals; |
| 30 | +import static org.junit.jupiter.api.Assertions.assertNotNull; |
| 31 | +import static org.junit.jupiter.api.Assertions.fail; |
| 32 | + |
29 | 33 | /** |
30 | 34 | * Tests for the {@link SentenceDetectorME} class. |
31 | 35 | * <p> |
|
42 | 46 | public class SentenceDetectorMEGermanTest extends AbstractSentenceDetectorTest { |
43 | 47 |
|
44 | 48 | private static final char[] EOS_CHARS = {'.', '?', '!'}; |
45 | | - |
46 | | - private static SentenceModel sentdetectModel; |
| 49 | + private static Dictionary abbreviationDict; |
| 50 | + private SentenceModel sentdetectModel; |
47 | 51 |
|
48 | 52 | @BeforeAll |
49 | | - public static void prepareResources() throws IOException { |
50 | | - Dictionary abbreviationDict = loadAbbDictionary(Locale.GERMAN); |
51 | | - SentenceDetectorFactory factory = new SentenceDetectorFactory( |
52 | | - "deu", true, abbreviationDict, EOS_CHARS); |
53 | | - sentdetectModel = train(factory, Locale.GERMAN); |
54 | | - Assertions.assertNotNull(sentdetectModel); |
55 | | - Assertions.assertEquals("deu", sentdetectModel.getLanguage()); |
| 53 | + static void loadResources() throws IOException { |
| 54 | + abbreviationDict = loadAbbDictionary(Locale.GERMAN); |
| 55 | + } |
| 56 | + |
| 57 | + private void prepareResources(boolean useTokenEnd) { |
| 58 | + try { |
| 59 | + SentenceDetectorFactory factory = new SentenceDetectorFactory( |
| 60 | + "deu", useTokenEnd, abbreviationDict, EOS_CHARS); |
| 61 | + sentdetectModel = train(factory, Locale.GERMAN); |
| 62 | + |
| 63 | + assertAll(() -> assertNotNull(sentdetectModel), |
| 64 | + () -> assertEquals("deu", sentdetectModel.getLanguage())); |
| 65 | + } catch (IOException ex) { |
| 66 | + fail("Couldn't train the SentenceModel using test data. Exception: " + ex.getMessage()); |
| 67 | + } |
56 | 68 | } |
57 | 69 |
|
58 | 70 | // Example taken from 'Sentences_DE.txt' |
59 | 71 | @Test |
60 | 72 | void testSentDetectWithInlineAbbreviationsEx1() { |
| 73 | + prepareResources(true); |
| 74 | + |
61 | 75 | final String sent1 = "Ein Traum, zu dessen Bildung eine besonders starke Verdichtung beigetragen, " + |
62 | | - "wird für diese Untersuchung das günstigste Material sein."; |
| 76 | + "wird für diese Untersuchung das günstigste Material sein."; |
63 | 77 | // Here we have two abbreviations "S. = Seite" and "ff. = folgende (Plural)" |
64 | 78 | final String sent2 = "Ich wähle den auf S. 183 ff. mitgeteilten Traum von der botanischen Monographie."; |
65 | 79 |
|
66 | 80 | SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel); |
67 | 81 | String sampleSentences = sent1 + " " + sent2; |
68 | 82 | String[] sents = sentDetect.sentDetect(sampleSentences); |
69 | | - Assertions.assertEquals(2, sents.length); |
70 | | - Assertions.assertEquals(sent1, sents[0]); |
71 | | - Assertions.assertEquals(sent2, sents[1]); |
72 | 83 | double[] probs = sentDetect.getSentenceProbabilities(); |
73 | | - Assertions.assertEquals(2, probs.length); |
| 84 | + |
| 85 | + assertAll(() -> assertEquals(2, sents.length), |
| 86 | + () -> assertEquals(sent1, sents[0]), |
| 87 | + () -> assertEquals(sent2, sents[1]), |
| 88 | + () -> assertEquals(2, probs.length)); |
74 | 89 | } |
75 | 90 |
|
76 | 91 | // Reduced example taken from 'Sentences_DE.txt' |
77 | 92 | @Test |
78 | 93 | void testSentDetectWithInlineAbbreviationsEx2() { |
| 94 | + prepareResources(true); |
| 95 | + |
79 | 96 | // Here we have three abbreviations: "S. = Seite", "vgl. = vergleiche", and "f. = folgende (Singular)" |
80 | 97 | final String sent1 = "Die farbige Tafel, die ich aufschlage, " + |
81 | | - "geht (vgl. die Analyse S. 185 f.) auf ein neues Thema ein."; |
| 98 | + "geht (vgl. die Analyse S. 185 f.) auf ein neues Thema ein."; |
82 | 99 |
|
83 | 100 | SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel); |
84 | 101 | String[] sents = sentDetect.sentDetect(sent1); |
85 | | - Assertions.assertEquals(1, sents.length); |
86 | | - Assertions.assertEquals(sent1, sents[0]); |
87 | 102 | double[] probs = sentDetect.getSentenceProbabilities(); |
88 | | - Assertions.assertEquals(1, probs.length); |
| 103 | + |
| 104 | + assertAll(() -> assertEquals(1, sents.length), |
| 105 | + () -> assertEquals(sent1, sents[0]), |
| 106 | + () -> assertEquals(1, probs.length)); |
89 | 107 | } |
90 | 108 |
|
91 | 109 | // Modified example deduced from 'Sentences_DE.txt' |
92 | 110 | @Test |
93 | 111 | void testSentDetectWithInlineAbbreviationsEx3() { |
| 112 | + prepareResources(true); |
| 113 | + |
94 | 114 | // Here we have two abbreviations "z. B. = zum Beispiel" and "S. = Seite" |
95 | 115 | final String sent1 = "Die farbige Tafel, die ich aufschlage, " + |
96 | | - "geht (z. B. die Analyse S. 185) auf ein neues Thema ein."; |
| 116 | + "geht (z. B. die Analyse S. 185) auf ein neues Thema ein."; |
97 | 117 |
|
98 | 118 | SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel); |
99 | 119 | String[] sents = sentDetect.sentDetect(sent1); |
100 | | - Assertions.assertEquals(1, sents.length); |
101 | | - Assertions.assertEquals(sent1, sents[0]); |
102 | 120 | double[] probs = sentDetect.getSentenceProbabilities(); |
103 | | - Assertions.assertEquals(1, probs.length); |
| 121 | + |
| 122 | + assertAll(() -> assertEquals(1, sents.length), |
| 123 | + () -> assertEquals(sent1, sents[0]), |
| 124 | + () -> assertEquals(1, probs.length)); |
| 125 | + } |
| 126 | + |
| 127 | + @Test |
| 128 | + void testSentDetectWithUseTokenEndFalse() { |
| 129 | + prepareResources(false); |
| 130 | + |
| 131 | + final String sent1 = "Träume sind eine Verbindung von Gedanken."; |
| 132 | + final String sent2 = "Verschiedene Gedanken sind während der Traumformation aktiv."; |
| 133 | + |
| 134 | + SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel); |
| 135 | + //There is no blank space before start of the second sentence. |
| 136 | + String[] sents = sentDetect.sentDetect(sent1 + sent2); |
| 137 | + double[] probs = sentDetect.getSentenceProbabilities(); |
| 138 | + |
| 139 | + assertAll(() -> assertEquals(2, sents.length), |
| 140 | + () -> assertEquals(sent1, sents[0]), |
| 141 | + () -> assertEquals(sent2, sents[1]), |
| 142 | + () -> assertEquals(2, probs.length)); |
104 | 143 | } |
105 | 144 | } |
0 commit comments