Skip to content

Commit 07c6bca

Browse files
committed
OPENNLP-1759: Optimize computation of hashCode in StringList (OpenNLP 2.x)
1 parent 26cac4f commit 07c6bca

File tree

2 files changed

+20
-31
lines changed

2 files changed

+20
-31
lines changed

opennlp-tools/src/main/java/opennlp/tools/util/StringList.java

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.Iterator;
2222
import java.util.NoSuchElementException;
2323
import java.util.Objects;
24+
import java.util.stream.Collectors;
2425

2526
import opennlp.tools.util.jvm.StringInterners;
2627

@@ -33,6 +34,9 @@ public class StringList implements Iterable<String> {
3334

3435
private final boolean caseSensitive;
3536

37+
// It is safe to use caching of the hashCode for this class
38+
private transient Integer hashCode = null; // initial value is uncomputed
39+
3640
/**
3741
* Initializes a {@link StringList} instance. By default, this instance is case-sensitive.
3842
* <p>
@@ -53,7 +57,7 @@ public StringList(String singleToken) {
5357
*
5458
* @param tokens The string parts of the new {@link StringList}.
5559
* Must not be an empty tokens array or {@code null}.
56-
*
60+
*
5761
* @throws IllegalArgumentException Thrown if parameters were invalid.
5862
*/
5963
public StringList(String... tokens) {
@@ -73,15 +77,13 @@ public StringList(String... tokens) {
7377
* @throws IllegalArgumentException Thrown if parameters were invalid.
7478
*/
7579
public StringList(boolean isCaseSensitive, String... tokens) {
76-
7780
Objects.requireNonNull(tokens, "tokens must not be null");
7881

7982
if (tokens.length == 0) {
8083
throw new IllegalArgumentException("tokens must not be empty");
8184
}
8285

8386
this.tokens = new String[tokens.length];
84-
8587
for (int i = 0; i < tokens.length; i++) {
8688
this.tokens[i] = StringInterners.intern(tokens[i]);
8789
}
@@ -161,8 +163,11 @@ public boolean compareToIgnoreCase(StringList tokens) {
161163

162164
@Override
163165
public int hashCode() {
164-
// if lookup is too slow optimize this
165-
return StringUtil.toLowerCase(toString()).hashCode();
166+
if (hashCode == null) {
167+
// compute once and cache to safe CPU cycles during use
168+
this.hashCode = StringUtil.toLowerCase(String.join(",", tokens)).hashCode();
169+
}
170+
return hashCode;
166171
}
167172

168173
@Override
@@ -184,21 +189,7 @@ public boolean equals(Object obj) {
184189
*/
185190
@Override
186191
public String toString() {
187-
StringBuilder string = new StringBuilder();
188-
189-
string.append('[');
190-
191-
for (int i = 0; i < size(); i++) {
192-
string.append(getToken(i));
193-
194-
if (i < size() - 1) {
195-
string.append(',');
196-
}
197-
}
198-
199-
string.append(']');
200-
201-
return string.toString();
192+
return Arrays.stream(tokens).collect(Collectors.joining(",", "[", "]"));
202193
}
203194

204195
/**

opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.junit.jupiter.api.BeforeAll;
3939
import org.junit.jupiter.api.Test;
4040

41+
import opennlp.tools.dictionary.Dictionary;
4142
import opennlp.tools.util.StringList;
4243
import opennlp.uima.AbstractTest;
4344
import opennlp.uima.util.CasUtil;
@@ -69,17 +70,14 @@ private static AnalysisEngine produceAE(String descName)
6970
public void testDictionaryWasLoaded() {
7071

7172
try {
72-
DictionaryResource dic = (DictionaryResource) AE.getResourceManager()
73-
.getResource("/opennlp.uima.Dictionary");
74-
// simple check if ordering always is the same...
75-
Assertions.assertEquals(
76-
"[[Berlin], [Stockholm], [New,York], [London], [Copenhagen], [Paris]]",
77-
dic.getDictionary().toString());
78-
// else we can do a simple test like this
79-
Assertions.assertEquals(6,
80-
dic.getDictionary().asStringSet().size(), "There should be six entries in the dictionary");
81-
Assertions.assertTrue(dic.getDictionary().contains(new StringList("London")),
82-
"London should be in the dictionary");
73+
final DictionaryResource dic = (DictionaryResource) AE.getResourceManager()
74+
.getResource("/opennlp.uima.Dictionary");
75+
final Dictionary d = dic.getDictionary();
76+
Assertions.assertNotNull(d);
77+
Assertions.assertEquals(6, d.asStringSet().size(),
78+
"There should be six entries in the dictionary");
79+
Assertions.assertTrue(d.contains(new StringList("London")),
80+
"London should be in the dictionary");
8381
} catch (Exception e) {
8482
Assertions.fail("Dictionary was not loaded.");
8583
}

0 commit comments

Comments
 (0)