Skip to content

Commit b48d5be

Browse files
authored
LUCENE-9707: Hunspell: check Lucene's implementation against Hunspel's test data (#2267)
1 parent 2da7a4a commit b48d5be

File tree

3 files changed

+112
-51
lines changed

3 files changed

+112
-51
lines changed

gradle/testing/randomization/policies/tests.policy

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ grant {
9090

9191
// allows LuceneTestCase#runWithRestrictedPermissions to execute with lower (or no) permission
9292
permission java.security.SecurityPermission "createAccessControlContext";
93+
94+
// Some Hunspell tests may read from external files specified in system properties
95+
permission java.io.FilePermission "${hunspell.repo.path}${/}-", "read";
9396
};
9497

9598
// Permissions to support ant build

lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/SpellCheckerTest.java

Lines changed: 39 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -16,99 +16,83 @@
1616
*/
1717
package org.apache.lucene.analysis.hunspell;
1818

19+
import java.io.IOException;
1920
import java.io.InputStream;
20-
import java.net.URL;
2121
import java.nio.file.Files;
2222
import java.nio.file.Path;
23+
import java.text.ParseException;
2324
import java.util.List;
24-
import java.util.Objects;
2525
import java.util.stream.Collectors;
2626
import org.apache.lucene.store.ByteBuffersDirectory;
2727
import org.apache.lucene.util.IOUtils;
28-
import org.junit.Test;
2928

3029
public class SpellCheckerTest extends StemmerTestBase {
31-
@Test
32-
public void base() throws Exception {
30+
31+
public void testBase() throws Exception {
3332
doTest("base");
3433
}
3534

36-
@Test
37-
public void baseUtf() throws Exception {
35+
public void testBaseUtf() throws Exception {
3836
doTest("base_utf");
3937
}
4038

41-
@Test
42-
public void keepcase() throws Exception {
39+
public void testKeepcase() throws Exception {
4340
doTest("keepcase");
4441
}
4542

46-
@Test
47-
public void allcaps() throws Exception {
43+
public void testAllcaps() throws Exception {
4844
doTest("allcaps");
4945
}
5046

5147
public void rep() throws Exception {
5248
doTest("rep");
5349
}
5450

55-
@Test
56-
public void forceUCase() throws Exception {
51+
public void testForceUCase() throws Exception {
5752
doTest("forceucase");
5853
}
5954

60-
@Test
61-
public void checkSharpS() throws Exception {
55+
public void testCheckSharpS() throws Exception {
6256
doTest("checksharps");
6357
}
6458

65-
@Test
66-
public void IJ() throws Exception {
59+
public void testIJ() throws Exception {
6760
doTest("IJ");
6861
}
6962

70-
@Test
71-
public void i53643_numbersWithSeparators() throws Exception {
63+
public void testI53643_numbersWithSeparators() throws Exception {
7264
doTest("i53643");
7365
}
7466

75-
@Test
76-
public void dotless_i() throws Exception {
67+
public void testDotless_i() throws Exception {
7768
doTest("dotless_i");
7869
}
7970

80-
@Test
81-
public void needAffixOnAffixes() throws Exception {
71+
public void testNeedAffixOnAffixes() throws Exception {
8272
doTest("needaffix5");
8373
}
8474

85-
@Test
86-
public void compoundFlag() throws Exception {
75+
public void testCompoundFlag() throws Exception {
8776
doTest("compoundflag");
8877
}
8978

90-
@Test
91-
public void checkCompoundCase() throws Exception {
79+
public void testCheckCompoundCase() throws Exception {
9280
doTest("checkcompoundcase");
9381
}
9482

95-
@Test
96-
public void checkCompoundDup() throws Exception {
83+
public void testCheckCompoundDup() throws Exception {
9784
doTest("checkcompounddup");
9885
}
9986

100-
@Test
101-
public void checkCompoundTriple() throws Exception {
87+
public void testCheckCompoundTriple() throws Exception {
10288
doTest("checkcompoundtriple");
10389
}
10490

105-
@Test
106-
public void simplifiedTriple() throws Exception {
91+
public void testSimplifiedTriple() throws Exception {
10792
doTest("simplifiedtriple");
10893
}
10994

110-
@Test
111-
public void compoundForbid() throws Exception {
95+
public void testCompoundForbid() throws Exception {
11296
doTest("compoundforbid");
11397
}
11498

@@ -161,10 +145,14 @@ public void testGermanCompounding() throws Exception {
161145
}
162146

163147
protected void doTest(String name) throws Exception {
164-
InputStream affixStream =
165-
Objects.requireNonNull(getClass().getResourceAsStream(name + ".aff"), name);
166-
InputStream dictStream =
167-
Objects.requireNonNull(getClass().getResourceAsStream(name + ".dic"), name);
148+
checkSpellCheckerExpectations(
149+
Path.of(getClass().getResource(name + ".aff").toURI()).getParent().resolve(name), true);
150+
}
151+
152+
static void checkSpellCheckerExpectations(Path basePath, boolean checkSuggestions)
153+
throws IOException, ParseException {
154+
InputStream affixStream = Files.newInputStream(Path.of(basePath.toString() + ".aff"));
155+
InputStream dictStream = Files.newInputStream(Path.of(basePath.toString() + ".dic"));
168156

169157
SpellChecker speller;
170158
try {
@@ -176,30 +164,30 @@ protected void doTest(String name) throws Exception {
176164
IOUtils.closeWhileHandlingException(dictStream);
177165
}
178166

179-
URL good = StemmerTestBase.class.getResource(name + ".good");
180-
if (good != null) {
181-
for (String word : Files.readAllLines(Path.of(good.toURI()))) {
182-
assertTrue("Unexpectedly considered misspelled: " + word, speller.spell(word));
167+
Path good = Path.of(basePath + ".good");
168+
if (Files.exists(good)) {
169+
for (String word : Files.readAllLines(good)) {
170+
assertTrue("Unexpectedly considered misspelled: " + word, speller.spell(word.trim()));
183171
}
184172
}
185173

186-
URL wrong = StemmerTestBase.class.getResource(name + ".wrong");
187-
URL sug = StemmerTestBase.class.getResource(name + ".sug");
188-
if (wrong != null) {
189-
List<String> wrongWords = Files.readAllLines(Path.of(wrong.toURI()));
174+
Path wrong = Path.of(basePath + ".wrong");
175+
Path sug = Path.of(basePath + ".sug");
176+
if (Files.exists(wrong)) {
177+
List<String> wrongWords = Files.readAllLines(wrong);
190178
for (String word : wrongWords) {
191-
assertFalse("Unexpectedly considered correct: " + word, speller.spell(word));
179+
assertFalse("Unexpectedly considered correct: " + word, speller.spell(word.trim()));
192180
}
193-
if (sug != null) {
181+
if (Files.exists(sug) && checkSuggestions) {
194182
String suggestions =
195183
wrongWords.stream()
196184
.map(s -> String.join(", ", speller.suggest(s)))
197185
.filter(s -> !s.isEmpty())
198186
.collect(Collectors.joining("\n"));
199-
assertEquals(Files.readString(Path.of(sug.toURI())).trim(), suggestions);
187+
assertEquals(Files.readString(sug).trim(), suggestions);
200188
}
201189
} else {
202-
assertNull(".sug file without .wrong file!", sug);
190+
assertFalse(".sug file without .wrong file!", Files.exists(sug));
203191
}
204192
}
205193
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.analysis.hunspell;
18+
19+
import java.io.IOException;
20+
import java.nio.file.DirectoryStream;
21+
import java.nio.file.Files;
22+
import java.nio.file.Path;
23+
import java.text.ParseException;
24+
import java.util.Collection;
25+
import java.util.Set;
26+
import java.util.TreeSet;
27+
import java.util.stream.Collectors;
28+
import org.junit.AssumptionViolatedException;
29+
import org.junit.Test;
30+
import org.junit.runner.RunWith;
31+
import org.junit.runners.Parameterized;
32+
33+
/**
34+
* Same as {@link SpellCheckerTest}, but checks all Hunspell's test data. The path to the checked
35+
* out Hunspell repository should be in {@code -Dhunspell.repo.path=...} system property.
36+
*/
37+
@RunWith(Parameterized.class)
38+
public class TestHunspellRepositoryTestCases {
39+
private final Path pathPrefix;
40+
41+
public TestHunspellRepositoryTestCases(String testName, Path pathPrefix) {
42+
this.pathPrefix = pathPrefix;
43+
}
44+
45+
@Parameterized.Parameters(name = "{0}")
46+
public static Collection<Object[]> data() throws IOException {
47+
String hunspellRepo = System.getProperty("hunspell.repo.path");
48+
if (hunspellRepo == null) {
49+
throw new AssumptionViolatedException("hunspell.repo.path property not specified.");
50+
}
51+
52+
Set<String> names = new TreeSet<>();
53+
Path tests = Path.of(hunspellRepo).resolve("tests");
54+
try (DirectoryStream<Path> files = Files.newDirectoryStream(tests)) {
55+
for (Path file : files) {
56+
String name = file.getFileName().toString();
57+
if (name.endsWith(".aff")) {
58+
names.add(name.substring(0, name.length() - 4));
59+
}
60+
}
61+
}
62+
63+
return names.stream().map(s -> new Object[] {s, tests.resolve(s)}).collect(Collectors.toList());
64+
}
65+
66+
@Test
67+
public void test() throws IOException, ParseException {
68+
SpellCheckerTest.checkSpellCheckerExpectations(pathPrefix, false);
69+
}
70+
}

0 commit comments

Comments
 (0)